Home | History | Annotate | Download | only in x86_64
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
     18 #define ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
     19 
     20 #include <vector>
     21 
     22 #include "base/arena_containers.h"
     23 #include "base/array_ref.h"
     24 #include "base/bit_utils.h"
     25 #include "base/macros.h"
     26 #include "constants_x86_64.h"
     27 #include "globals.h"
     28 #include "managed_register_x86_64.h"
     29 #include "offsets.h"
     30 #include "utils/assembler.h"
     31 #include "utils/jni_macro_assembler.h"
     32 
     33 namespace art {
     34 namespace x86_64 {
     35 
     36 // Encodes an immediate value for operands.
     37 //
     38 // Note: Immediates can be 64b on x86-64 for certain instructions, but are often restricted
     39 // to 32b.
     40 //
     41 // Note: As we support cross-compilation, the value type must be int64_t. Please be aware of
     42 // conversion rules in expressions regarding negation, especially size_t on 32b.
     43 class Immediate : public ValueObject {
     44  public:
     45   explicit Immediate(int64_t value_in) : value_(value_in) {}
     46 
     47   int64_t value() const { return value_; }
     48 
     49   bool is_int8() const { return IsInt<8>(value_); }
     50   bool is_uint8() const { return IsUint<8>(value_); }
     51   bool is_int16() const { return IsInt<16>(value_); }
     52   bool is_uint16() const { return IsUint<16>(value_); }
     53   bool is_int32() const { return IsInt<32>(value_); }
     54 
     55  private:
     56   const int64_t value_;
     57 };
     58 
     59 
     60 class Operand : public ValueObject {
     61  public:
     62   uint8_t mod() const {
     63     return (encoding_at(0) >> 6) & 3;
     64   }
     65 
     66   Register rm() const {
     67     return static_cast<Register>(encoding_at(0) & 7);
     68   }
     69 
     70   ScaleFactor scale() const {
     71     return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
     72   }
     73 
     74   Register index() const {
     75     return static_cast<Register>((encoding_at(1) >> 3) & 7);
     76   }
     77 
     78   Register base() const {
     79     return static_cast<Register>(encoding_at(1) & 7);
     80   }
     81 
     82   uint8_t rex() const {
     83     return rex_;
     84   }
     85 
     86   int8_t disp8() const {
     87     CHECK_GE(length_, 2);
     88     return static_cast<int8_t>(encoding_[length_ - 1]);
     89   }
     90 
     91   int32_t disp32() const {
     92     CHECK_GE(length_, 5);
     93     int32_t value;
     94     memcpy(&value, &encoding_[length_ - 4], sizeof(value));
     95     return value;
     96   }
     97 
     98   bool IsRegister(CpuRegister reg) const {
     99     return ((encoding_[0] & 0xF8) == 0xC0)  // Addressing mode is register only.
    100         && ((encoding_[0] & 0x07) == reg.LowBits())  // Register codes match.
    101         && (reg.NeedsRex() == ((rex_ & 1) != 0));  // REX.000B bits match.
    102   }
    103 
    104   AssemblerFixup* GetFixup() const {
    105     return fixup_;
    106   }
    107 
    108  protected:
    109   // Operand can be sub classed (e.g: Address).
    110   Operand() : rex_(0), length_(0), fixup_(nullptr) { }
    111 
    112   void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
    113     CHECK_EQ(mod_in & ~3, 0);
    114     if (rm_in.NeedsRex()) {
    115       rex_ |= 0x41;  // REX.000B
    116     }
    117     encoding_[0] = (mod_in << 6) | rm_in.LowBits();
    118     length_ = 1;
    119   }
    120 
    121   void SetSIB(ScaleFactor scale_in, CpuRegister index_in, CpuRegister base_in) {
    122     CHECK_EQ(length_, 1);
    123     CHECK_EQ(scale_in & ~3, 0);
    124     if (base_in.NeedsRex()) {
    125       rex_ |= 0x41;  // REX.000B
    126     }
    127     if (index_in.NeedsRex()) {
    128       rex_ |= 0x42;  // REX.00X0
    129     }
    130     encoding_[1] = (scale_in << 6) | (static_cast<uint8_t>(index_in.LowBits()) << 3) |
    131         static_cast<uint8_t>(base_in.LowBits());
    132     length_ = 2;
    133   }
    134 
    135   void SetDisp8(int8_t disp) {
    136     CHECK(length_ == 1 || length_ == 2);
    137     encoding_[length_++] = static_cast<uint8_t>(disp);
    138   }
    139 
    140   void SetDisp32(int32_t disp) {
    141     CHECK(length_ == 1 || length_ == 2);
    142     int disp_size = sizeof(disp);
    143     memmove(&encoding_[length_], &disp, disp_size);
    144     length_ += disp_size;
    145   }
    146 
    147   void SetFixup(AssemblerFixup* fixup) {
    148     fixup_ = fixup;
    149   }
    150 
    151  private:
    152   uint8_t rex_;
    153   uint8_t length_;
    154   uint8_t encoding_[6];
    155   AssemblerFixup* fixup_;
    156 
    157   explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
    158 
    159   // Get the operand encoding byte at the given index.
    160   uint8_t encoding_at(int index_in) const {
    161     CHECK_GE(index_in, 0);
    162     CHECK_LT(index_in, length_);
    163     return encoding_[index_in];
    164   }
    165 
    166   friend class X86_64Assembler;
    167 };
    168 
    169 
    170 class Address : public Operand {
    171  public:
    172   Address(CpuRegister base_in, int32_t disp) {
    173     Init(base_in, disp);
    174   }
    175 
    176   Address(CpuRegister base_in, Offset disp) {
    177     Init(base_in, disp.Int32Value());
    178   }
    179 
    180   Address(CpuRegister base_in, FrameOffset disp) {
    181     CHECK_EQ(base_in.AsRegister(), RSP);
    182     Init(CpuRegister(RSP), disp.Int32Value());
    183   }
    184 
    185   Address(CpuRegister base_in, MemberOffset disp) {
    186     Init(base_in, disp.Int32Value());
    187   }
    188 
    189   void Init(CpuRegister base_in, int32_t disp) {
    190     if (disp == 0 && base_in.LowBits() != RBP) {
    191       SetModRM(0, base_in);
    192       if (base_in.LowBits() == RSP) {
    193         SetSIB(TIMES_1, CpuRegister(RSP), base_in);
    194       }
    195     } else if (disp >= -128 && disp <= 127) {
    196       SetModRM(1, base_in);
    197       if (base_in.LowBits() == RSP) {
    198         SetSIB(TIMES_1, CpuRegister(RSP), base_in);
    199       }
    200       SetDisp8(disp);
    201     } else {
    202       SetModRM(2, base_in);
    203       if (base_in.LowBits() == RSP) {
    204         SetSIB(TIMES_1, CpuRegister(RSP), base_in);
    205       }
    206       SetDisp32(disp);
    207     }
    208   }
    209 
    210 
    211   Address(CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
    212     CHECK_NE(index_in.AsRegister(), RSP);  // Illegal addressing mode.
    213     SetModRM(0, CpuRegister(RSP));
    214     SetSIB(scale_in, index_in, CpuRegister(RBP));
    215     SetDisp32(disp);
    216   }
    217 
    218   Address(CpuRegister base_in, CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
    219     CHECK_NE(index_in.AsRegister(), RSP);  // Illegal addressing mode.
    220     if (disp == 0 && base_in.LowBits() != RBP) {
    221       SetModRM(0, CpuRegister(RSP));
    222       SetSIB(scale_in, index_in, base_in);
    223     } else if (disp >= -128 && disp <= 127) {
    224       SetModRM(1, CpuRegister(RSP));
    225       SetSIB(scale_in, index_in, base_in);
    226       SetDisp8(disp);
    227     } else {
    228       SetModRM(2, CpuRegister(RSP));
    229       SetSIB(scale_in, index_in, base_in);
    230       SetDisp32(disp);
    231     }
    232   }
    233 
    234   // If no_rip is true then the Absolute address isn't RIP relative.
    235   static Address Absolute(uintptr_t addr, bool no_rip = false) {
    236     Address result;
    237     if (no_rip) {
    238       result.SetModRM(0, CpuRegister(RSP));
    239       result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP));
    240       result.SetDisp32(addr);
    241     } else {
    242       // RIP addressing is done using RBP as the base register.
    243       // The value in RBP isn't used.  Instead the offset is added to RIP.
    244       result.SetModRM(0, CpuRegister(RBP));
    245       result.SetDisp32(addr);
    246     }
    247     return result;
    248   }
    249 
    250   // An RIP relative address that will be fixed up later.
    251   static Address RIP(AssemblerFixup* fixup) {
    252     Address result;
    253     // RIP addressing is done using RBP as the base register.
    254     // The value in RBP isn't used.  Instead the offset is added to RIP.
    255     result.SetModRM(0, CpuRegister(RBP));
    256     result.SetDisp32(0);
    257     result.SetFixup(fixup);
    258     return result;
    259   }
    260 
    261   // If no_rip is true then the Absolute address isn't RIP relative.
    262   static Address Absolute(ThreadOffset64 addr, bool no_rip = false) {
    263     return Absolute(addr.Int32Value(), no_rip);
    264   }
    265 
    266  private:
    267   Address() {}
    268 };
    269 
    270 
    271 /**
    272  * Class to handle constant area values.
    273  */
    274 class ConstantArea {
    275  public:
    276   explicit ConstantArea(ArenaAllocator* arena) : buffer_(arena->Adapter(kArenaAllocAssembler)) {}
    277 
    278   // Add a double to the constant area, returning the offset into
    279   // the constant area where the literal resides.
    280   size_t AddDouble(double v);
    281 
    282   // Add a float to the constant area, returning the offset into
    283   // the constant area where the literal resides.
    284   size_t AddFloat(float v);
    285 
    286   // Add an int32_t to the constant area, returning the offset into
    287   // the constant area where the literal resides.
    288   size_t AddInt32(int32_t v);
    289 
    290   // Add an int32_t to the end of the constant area, returning the offset into
    291   // the constant area where the literal resides.
    292   size_t AppendInt32(int32_t v);
    293 
    294   // Add an int64_t to the constant area, returning the offset into
    295   // the constant area where the literal resides.
    296   size_t AddInt64(int64_t v);
    297 
    298   size_t GetSize() const {
    299     return buffer_.size() * elem_size_;
    300   }
    301 
    302   ArrayRef<const int32_t> GetBuffer() const {
    303     return ArrayRef<const int32_t>(buffer_);
    304   }
    305 
    306  private:
    307   static constexpr size_t elem_size_ = sizeof(int32_t);
    308   ArenaVector<int32_t> buffer_;
    309 };
    310 
    311 
    312 // This is equivalent to the Label class, used in a slightly different context. We
    313 // inherit the functionality of the Label class, but prevent unintended
    314 // derived-to-base conversions by making the base class private.
    315 class NearLabel : private Label {
    316  public:
    317   NearLabel() : Label() {}
    318 
    319   // Expose the Label routines that we need.
    320   using Label::Position;
    321   using Label::LinkPosition;
    322   using Label::IsBound;
    323   using Label::IsUnused;
    324   using Label::IsLinked;
    325 
    326  private:
    327   using Label::BindTo;
    328   using Label::LinkTo;
    329 
    330   friend class x86_64::X86_64Assembler;
    331 
    332   DISALLOW_COPY_AND_ASSIGN(NearLabel);
    333 };
    334 
    335 
    336 class X86_64Assembler FINAL : public Assembler {
    337  public:
    338   explicit X86_64Assembler(ArenaAllocator* arena) : Assembler(arena), constant_area_(arena) {}
    339   virtual ~X86_64Assembler() {}
    340 
    341   /*
    342    * Emit Machine Instructions.
    343    */
    344   void call(CpuRegister reg);
    345   void call(const Address& address);
    346   void call(Label* label);
    347 
    348   void pushq(CpuRegister reg);
    349   void pushq(const Address& address);
    350   void pushq(const Immediate& imm);
    351 
    352   void popq(CpuRegister reg);
    353   void popq(const Address& address);
    354 
    355   void movq(CpuRegister dst, const Immediate& src);
    356   void movl(CpuRegister dst, const Immediate& src);
    357   void movq(CpuRegister dst, CpuRegister src);
    358   void movl(CpuRegister dst, CpuRegister src);
    359 
    360   void movntl(const Address& dst, CpuRegister src);
    361   void movntq(const Address& dst, CpuRegister src);
    362 
    363   void movq(CpuRegister dst, const Address& src);
    364   void movl(CpuRegister dst, const Address& src);
    365   void movq(const Address& dst, CpuRegister src);
    366   void movq(const Address& dst, const Immediate& imm);
    367   void movl(const Address& dst, CpuRegister src);
    368   void movl(const Address& dst, const Immediate& imm);
    369 
    370   void cmov(Condition c, CpuRegister dst, CpuRegister src);  // This is the 64b version.
    371   void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit);
    372   void cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit);
    373 
    374   void movzxb(CpuRegister dst, CpuRegister src);
    375   void movzxb(CpuRegister dst, const Address& src);
    376   void movsxb(CpuRegister dst, CpuRegister src);
    377   void movsxb(CpuRegister dst, const Address& src);
    378   void movb(CpuRegister dst, const Address& src);
    379   void movb(const Address& dst, CpuRegister src);
    380   void movb(const Address& dst, const Immediate& imm);
    381 
    382   void movzxw(CpuRegister dst, CpuRegister src);
    383   void movzxw(CpuRegister dst, const Address& src);
    384   void movsxw(CpuRegister dst, CpuRegister src);
    385   void movsxw(CpuRegister dst, const Address& src);
    386   void movw(CpuRegister dst, const Address& src);
    387   void movw(const Address& dst, CpuRegister src);
    388   void movw(const Address& dst, const Immediate& imm);
    389 
    390   void leaq(CpuRegister dst, const Address& src);
    391   void leal(CpuRegister dst, const Address& src);
    392 
    393   void movaps(XmmRegister dst, XmmRegister src);     // move
    394   void movaps(XmmRegister dst, const Address& src);  // load aligned
    395   void movups(XmmRegister dst, const Address& src);  // load unaligned
    396   void movaps(const Address& dst, XmmRegister src);  // store aligned
    397   void movups(const Address& dst, XmmRegister src);  // store unaligned
    398 
    399   void movss(XmmRegister dst, const Address& src);
    400   void movss(const Address& dst, XmmRegister src);
    401   void movss(XmmRegister dst, XmmRegister src);
    402 
    403   void movsxd(CpuRegister dst, CpuRegister src);
    404   void movsxd(CpuRegister dst, const Address& src);
    405 
    406   void movd(XmmRegister dst, CpuRegister src);  // Note: this is the r64 version, formally movq.
    407   void movd(CpuRegister dst, XmmRegister src);  // Note: this is the r64 version, formally movq.
    408   void movd(XmmRegister dst, CpuRegister src, bool is64bit);
    409   void movd(CpuRegister dst, XmmRegister src, bool is64bit);
    410 
    411   void addss(XmmRegister dst, XmmRegister src);
    412   void addss(XmmRegister dst, const Address& src);
    413   void subss(XmmRegister dst, XmmRegister src);
    414   void subss(XmmRegister dst, const Address& src);
    415   void mulss(XmmRegister dst, XmmRegister src);
    416   void mulss(XmmRegister dst, const Address& src);
    417   void divss(XmmRegister dst, XmmRegister src);
    418   void divss(XmmRegister dst, const Address& src);
    419 
    420   void addps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    421   void subps(XmmRegister dst, XmmRegister src);
    422   void mulps(XmmRegister dst, XmmRegister src);
    423   void divps(XmmRegister dst, XmmRegister src);
    424 
    425   void movapd(XmmRegister dst, XmmRegister src);     // move
    426   void movapd(XmmRegister dst, const Address& src);  // load aligned
    427   void movupd(XmmRegister dst, const Address& src);  // load unaligned
    428   void movapd(const Address& dst, XmmRegister src);  // store aligned
    429   void movupd(const Address& dst, XmmRegister src);  // store unaligned
    430 
    431   void movsd(XmmRegister dst, const Address& src);
    432   void movsd(const Address& dst, XmmRegister src);
    433   void movsd(XmmRegister dst, XmmRegister src);
    434 
    435   void addsd(XmmRegister dst, XmmRegister src);
    436   void addsd(XmmRegister dst, const Address& src);
    437   void subsd(XmmRegister dst, XmmRegister src);
    438   void subsd(XmmRegister dst, const Address& src);
    439   void mulsd(XmmRegister dst, XmmRegister src);
    440   void mulsd(XmmRegister dst, const Address& src);
    441   void divsd(XmmRegister dst, XmmRegister src);
    442   void divsd(XmmRegister dst, const Address& src);
    443 
    444   void addpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    445   void subpd(XmmRegister dst, XmmRegister src);
    446   void mulpd(XmmRegister dst, XmmRegister src);
    447   void divpd(XmmRegister dst, XmmRegister src);
    448 
    449   void movdqa(XmmRegister dst, XmmRegister src);     // move
    450   void movdqa(XmmRegister dst, const Address& src);  // load aligned
    451   void movdqu(XmmRegister dst, const Address& src);  // load unaligned
    452   void movdqa(const Address& dst, XmmRegister src);  // store aligned
    453   void movdqu(const Address& dst, XmmRegister src);  // store unaligned
    454 
    455   void paddb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    456   void psubb(XmmRegister dst, XmmRegister src);
    457 
    458   void paddw(XmmRegister dst, XmmRegister src);
    459   void psubw(XmmRegister dst, XmmRegister src);
    460   void pmullw(XmmRegister dst, XmmRegister src);
    461 
    462   void paddd(XmmRegister dst, XmmRegister src);
    463   void psubd(XmmRegister dst, XmmRegister src);
    464   void pmulld(XmmRegister dst, XmmRegister src);
    465 
    466   void paddq(XmmRegister dst, XmmRegister src);
    467   void psubq(XmmRegister dst, XmmRegister src);
    468 
    469   void cvtsi2ss(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
    470   void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
    471   void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
    472   void cvtsi2sd(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
    473   void cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit);
    474   void cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit);
    475 
    476   void cvtss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
    477   void cvtss2sd(XmmRegister dst, XmmRegister src);
    478   void cvtss2sd(XmmRegister dst, const Address& src);
    479 
    480   void cvtsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
    481   void cvtsd2ss(XmmRegister dst, XmmRegister src);
    482   void cvtsd2ss(XmmRegister dst, const Address& src);
    483 
    484   void cvttss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
    485   void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
    486   void cvttsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
    487   void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
    488 
    489   void cvtdq2ps(XmmRegister dst, XmmRegister src);
    490   void cvtdq2pd(XmmRegister dst, XmmRegister src);
    491 
    492   void comiss(XmmRegister a, XmmRegister b);
    493   void comiss(XmmRegister a, const Address& b);
    494   void comisd(XmmRegister a, XmmRegister b);
    495   void comisd(XmmRegister a, const Address& b);
    496   void ucomiss(XmmRegister a, XmmRegister b);
    497   void ucomiss(XmmRegister a, const Address& b);
    498   void ucomisd(XmmRegister a, XmmRegister b);
    499   void ucomisd(XmmRegister a, const Address& b);
    500 
    501   void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
    502   void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
    503 
    504   void sqrtsd(XmmRegister dst, XmmRegister src);
    505   void sqrtss(XmmRegister dst, XmmRegister src);
    506 
    507   void xorpd(XmmRegister dst, const Address& src);
    508   void xorpd(XmmRegister dst, XmmRegister src);
    509   void xorps(XmmRegister dst, const Address& src);
    510   void xorps(XmmRegister dst, XmmRegister src);
    511   void pxor(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    512 
    513   void andpd(XmmRegister dst, const Address& src);
    514   void andpd(XmmRegister dst, XmmRegister src);
    515   void andps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    516   void pand(XmmRegister dst, XmmRegister src);
    517 
    518   void andnpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    519   void andnps(XmmRegister dst, XmmRegister src);
    520   void pandn(XmmRegister dst, XmmRegister src);
    521 
    522   void orpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    523   void orps(XmmRegister dst, XmmRegister src);
    524   void por(XmmRegister dst, XmmRegister src);
    525 
    526   void pavgb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    527   void pavgw(XmmRegister dst, XmmRegister src);
    528 
    529   void pminsb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    530   void pmaxsb(XmmRegister dst, XmmRegister src);
    531   void pminsw(XmmRegister dst, XmmRegister src);
    532   void pmaxsw(XmmRegister dst, XmmRegister src);
    533   void pminsd(XmmRegister dst, XmmRegister src);
    534   void pmaxsd(XmmRegister dst, XmmRegister src);
    535 
    536   void pminub(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    537   void pmaxub(XmmRegister dst, XmmRegister src);
    538   void pminuw(XmmRegister dst, XmmRegister src);
    539   void pmaxuw(XmmRegister dst, XmmRegister src);
    540   void pminud(XmmRegister dst, XmmRegister src);
    541   void pmaxud(XmmRegister dst, XmmRegister src);
    542 
    543   void minps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
    544   void maxps(XmmRegister dst, XmmRegister src);
    545   void minpd(XmmRegister dst, XmmRegister src);
    546   void maxpd(XmmRegister dst, XmmRegister src);
    547 
    548   void pcmpeqb(XmmRegister dst, XmmRegister src);
    549   void pcmpeqw(XmmRegister dst, XmmRegister src);
    550   void pcmpeqd(XmmRegister dst, XmmRegister src);
    551   void pcmpeqq(XmmRegister dst, XmmRegister src);
    552 
    553   void pcmpgtb(XmmRegister dst, XmmRegister src);
    554   void pcmpgtw(XmmRegister dst, XmmRegister src);
    555   void pcmpgtd(XmmRegister dst, XmmRegister src);
    556   void pcmpgtq(XmmRegister dst, XmmRegister src);  // SSE4.2
    557 
    558   void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
    559   void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
    560   void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
    561 
    562   void punpcklbw(XmmRegister dst, XmmRegister src);
    563   void punpcklwd(XmmRegister dst, XmmRegister src);
    564   void punpckldq(XmmRegister dst, XmmRegister src);
    565   void punpcklqdq(XmmRegister dst, XmmRegister src);
    566 
    567   void psllw(XmmRegister reg, const Immediate& shift_count);
    568   void pslld(XmmRegister reg, const Immediate& shift_count);
    569   void psllq(XmmRegister reg, const Immediate& shift_count);
    570 
    571   void psraw(XmmRegister reg, const Immediate& shift_count);
    572   void psrad(XmmRegister reg, const Immediate& shift_count);
    573   // no psraq
    574 
    575   void psrlw(XmmRegister reg, const Immediate& shift_count);
    576   void psrld(XmmRegister reg, const Immediate& shift_count);
    577   void psrlq(XmmRegister reg, const Immediate& shift_count);
    578 
    579   void flds(const Address& src);
    580   void fstps(const Address& dst);
    581   void fsts(const Address& dst);
    582 
    583   void fldl(const Address& src);
    584   void fstpl(const Address& dst);
    585   void fstl(const Address& dst);
    586 
    587   void fstsw();
    588 
    589   void fucompp();
    590 
    591   void fnstcw(const Address& dst);
    592   void fldcw(const Address& src);
    593 
    594   void fistpl(const Address& dst);
    595   void fistps(const Address& dst);
    596   void fildl(const Address& src);
    597   void filds(const Address& src);
    598 
    599   void fincstp();
    600   void ffree(const Immediate& index);
    601 
    602   void fsin();
    603   void fcos();
    604   void fptan();
    605   void fprem();
    606 
    607   void xchgl(CpuRegister dst, CpuRegister src);
    608   void xchgq(CpuRegister dst, CpuRegister src);
    609   void xchgl(CpuRegister reg, const Address& address);
    610 
    611   void cmpb(const Address& address, const Immediate& imm);
    612   void cmpw(const Address& address, const Immediate& imm);
    613 
    614   void cmpl(CpuRegister reg, const Immediate& imm);
    615   void cmpl(CpuRegister reg0, CpuRegister reg1);
    616   void cmpl(CpuRegister reg, const Address& address);
    617   void cmpl(const Address& address, CpuRegister reg);
    618   void cmpl(const Address& address, const Immediate& imm);
    619 
    620   void cmpq(CpuRegister reg0, CpuRegister reg1);
    621   void cmpq(CpuRegister reg0, const Immediate& imm);
    622   void cmpq(CpuRegister reg0, const Address& address);
    623   void cmpq(const Address& address, const Immediate& imm);
    624 
    625   void testl(CpuRegister reg1, CpuRegister reg2);
    626   void testl(CpuRegister reg, const Address& address);
    627   void testl(CpuRegister reg, const Immediate& imm);
    628 
    629   void testq(CpuRegister reg1, CpuRegister reg2);
    630   void testq(CpuRegister reg, const Address& address);
    631 
    632   void testb(const Address& address, const Immediate& imm);
    633   void testl(const Address& address, const Immediate& imm);
    634 
    635   void andl(CpuRegister dst, const Immediate& imm);
    636   void andl(CpuRegister dst, CpuRegister src);
    637   void andl(CpuRegister reg, const Address& address);
    638   void andq(CpuRegister dst, const Immediate& imm);
    639   void andq(CpuRegister dst, CpuRegister src);
    640   void andq(CpuRegister reg, const Address& address);
    641 
    642   void orl(CpuRegister dst, const Immediate& imm);
    643   void orl(CpuRegister dst, CpuRegister src);
    644   void orl(CpuRegister reg, const Address& address);
    645   void orq(CpuRegister dst, CpuRegister src);
    646   void orq(CpuRegister dst, const Immediate& imm);
    647   void orq(CpuRegister reg, const Address& address);
    648 
    649   void xorl(CpuRegister dst, CpuRegister src);
    650   void xorl(CpuRegister dst, const Immediate& imm);
    651   void xorl(CpuRegister reg, const Address& address);
    652   void xorq(CpuRegister dst, const Immediate& imm);
    653   void xorq(CpuRegister dst, CpuRegister src);
    654   void xorq(CpuRegister reg, const Address& address);
    655 
    656   void addl(CpuRegister dst, CpuRegister src);
    657   void addl(CpuRegister reg, const Immediate& imm);
    658   void addl(CpuRegister reg, const Address& address);
    659   void addl(const Address& address, CpuRegister reg);
    660   void addl(const Address& address, const Immediate& imm);
    661 
    662   void addq(CpuRegister reg, const Immediate& imm);
    663   void addq(CpuRegister dst, CpuRegister src);
    664   void addq(CpuRegister dst, const Address& address);
    665 
    666   void subl(CpuRegister dst, CpuRegister src);
    667   void subl(CpuRegister reg, const Immediate& imm);
    668   void subl(CpuRegister reg, const Address& address);
    669 
    670   void subq(CpuRegister reg, const Immediate& imm);
    671   void subq(CpuRegister dst, CpuRegister src);
    672   void subq(CpuRegister dst, const Address& address);
    673 
    674   void cdq();
    675   void cqo();
    676 
    677   void idivl(CpuRegister reg);
    678   void idivq(CpuRegister reg);
    679 
    680   void imull(CpuRegister dst, CpuRegister src);
    681   void imull(CpuRegister reg, const Immediate& imm);
    682   void imull(CpuRegister dst, CpuRegister src, const Immediate& imm);
    683   void imull(CpuRegister reg, const Address& address);
    684 
    685   void imulq(CpuRegister src);
    686   void imulq(CpuRegister dst, CpuRegister src);
    687   void imulq(CpuRegister reg, const Immediate& imm);
    688   void imulq(CpuRegister reg, const Address& address);
    689   void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
    690 
    691   void imull(CpuRegister reg);
    692   void imull(const Address& address);
    693 
    694   void mull(CpuRegister reg);
    695   void mull(const Address& address);
    696 
    697   void shll(CpuRegister reg, const Immediate& imm);
    698   void shll(CpuRegister operand, CpuRegister shifter);
    699   void shrl(CpuRegister reg, const Immediate& imm);
    700   void shrl(CpuRegister operand, CpuRegister shifter);
    701   void sarl(CpuRegister reg, const Immediate& imm);
    702   void sarl(CpuRegister operand, CpuRegister shifter);
    703 
    704   void shlq(CpuRegister reg, const Immediate& imm);
    705   void shlq(CpuRegister operand, CpuRegister shifter);
    706   void shrq(CpuRegister reg, const Immediate& imm);
    707   void shrq(CpuRegister operand, CpuRegister shifter);
    708   void sarq(CpuRegister reg, const Immediate& imm);
    709   void sarq(CpuRegister operand, CpuRegister shifter);
    710 
    711   void negl(CpuRegister reg);
    712   void negq(CpuRegister reg);
    713 
    714   void notl(CpuRegister reg);
    715   void notq(CpuRegister reg);
    716 
    717   void enter(const Immediate& imm);
    718   void leave();
    719 
    720   void ret();
    721   void ret(const Immediate& imm);
    722 
    723   void nop();
    724   void int3();
    725   void hlt();
    726 
    727   void j(Condition condition, Label* label);
    728   void j(Condition condition, NearLabel* label);
    729   void jrcxz(NearLabel* label);
    730 
    731   void jmp(CpuRegister reg);
    732   void jmp(const Address& address);
    733   void jmp(Label* label);
    734   void jmp(NearLabel* label);
    735 
    736   X86_64Assembler* lock();
    737   void cmpxchgl(const Address& address, CpuRegister reg);
    738   void cmpxchgq(const Address& address, CpuRegister reg);
    739 
    740   void mfence();
    741 
    742   X86_64Assembler* gs();
    743 
    744   void setcc(Condition condition, CpuRegister dst);
    745 
    746   void bswapl(CpuRegister dst);
    747   void bswapq(CpuRegister dst);
    748 
    749   void bsfl(CpuRegister dst, CpuRegister src);
    750   void bsfl(CpuRegister dst, const Address& src);
    751   void bsfq(CpuRegister dst, CpuRegister src);
    752   void bsfq(CpuRegister dst, const Address& src);
    753 
    754   void bsrl(CpuRegister dst, CpuRegister src);
    755   void bsrl(CpuRegister dst, const Address& src);
    756   void bsrq(CpuRegister dst, CpuRegister src);
    757   void bsrq(CpuRegister dst, const Address& src);
    758 
    759   void popcntl(CpuRegister dst, CpuRegister src);
    760   void popcntl(CpuRegister dst, const Address& src);
    761   void popcntq(CpuRegister dst, CpuRegister src);
    762   void popcntq(CpuRegister dst, const Address& src);
    763 
    764   void rorl(CpuRegister reg, const Immediate& imm);
    765   void rorl(CpuRegister operand, CpuRegister shifter);
    766   void roll(CpuRegister reg, const Immediate& imm);
    767   void roll(CpuRegister operand, CpuRegister shifter);
    768 
    769   void rorq(CpuRegister reg, const Immediate& imm);
    770   void rorq(CpuRegister operand, CpuRegister shifter);
    771   void rolq(CpuRegister reg, const Immediate& imm);
    772   void rolq(CpuRegister operand, CpuRegister shifter);
    773 
    774   void repne_scasb();
    775   void repne_scasw();
    776   void repe_cmpsw();
    777   void repe_cmpsl();
    778   void repe_cmpsq();
    779   void rep_movsw();
    780 
    781   //
    782   // Macros for High-level operations.
    783   //
    784 
    785   void AddImmediate(CpuRegister reg, const Immediate& imm);
    786 
    787   void LoadDoubleConstant(XmmRegister dst, double value);
    788 
    789   void LockCmpxchgl(const Address& address, CpuRegister reg) {
    790     lock()->cmpxchgl(address, reg);
    791   }
    792 
    793   void LockCmpxchgq(const Address& address, CpuRegister reg) {
    794     lock()->cmpxchgq(address, reg);
    795   }
    796 
    797   //
    798   // Misc. functionality
    799   //
    800   int PreferredLoopAlignment() { return 16; }
    801   void Align(int alignment, int offset);
    802   void Bind(Label* label) OVERRIDE;
    803   void Jump(Label* label) OVERRIDE {
    804     jmp(label);
    805   }
    806   void Bind(NearLabel* label);
    807 
    808   // Add a double to the constant area, returning the offset into
    809   // the constant area where the literal resides.
    810   size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
    811 
    812   // Add a float to the constant area, returning the offset into
    813   // the constant area where the literal resides.
    814   size_t AddFloat(float v)   { return constant_area_.AddFloat(v); }
    815 
    816   // Add an int32_t to the constant area, returning the offset into
    817   // the constant area where the literal resides.
    818   size_t AddInt32(int32_t v) {
    819     return constant_area_.AddInt32(v);
    820   }
    821 
    822   // Add an int32_t to the end of the constant area, returning the offset into
    823   // the constant area where the literal resides.
    824   size_t AppendInt32(int32_t v) {
    825     return constant_area_.AppendInt32(v);
    826   }
    827 
    828   // Add an int64_t to the constant area, returning the offset into
    829   // the constant area where the literal resides.
    830   size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
    831 
    832   // Add the contents of the constant area to the assembler buffer.
    833   void AddConstantArea();
    834 
    835   // Is the constant area empty? Return true if there are no literals in the constant area.
    836   bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
    837 
    838   // Return the current size of the constant area.
    839   size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
    840 
    841   //
    842   // Heap poisoning.
    843   //
    844 
    845   // Poison a heap reference contained in `reg`.
    846   void PoisonHeapReference(CpuRegister reg) { negl(reg); }
    847   // Unpoison a heap reference contained in `reg`.
    848   void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
    849   // Poison a heap reference contained in `reg` if heap poisoning is enabled.
    850   void MaybePoisonHeapReference(CpuRegister reg) {
    851     if (kPoisonHeapReferences) {
    852       PoisonHeapReference(reg);
    853     }
    854   }
    855   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
    856   void MaybeUnpoisonHeapReference(CpuRegister reg) {
    857     if (kPoisonHeapReferences) {
    858       UnpoisonHeapReference(reg);
    859     }
    860   }
    861 
    862  private:
    863   void EmitUint8(uint8_t value);
    864   void EmitInt32(int32_t value);
    865   void EmitInt64(int64_t value);
    866   void EmitRegisterOperand(uint8_t rm, uint8_t reg);
    867   void EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg);
    868   void EmitFixup(AssemblerFixup* fixup);
    869   void EmitOperandSizeOverride();
    870 
    871   void EmitOperand(uint8_t rm, const Operand& operand);
    872   void EmitImmediate(const Immediate& imm);
    873   void EmitComplex(uint8_t rm, const Operand& operand, const Immediate& immediate);
    874   void EmitLabel(Label* label, int instruction_size);
    875   void EmitLabelLink(Label* label);
    876   void EmitLabelLink(NearLabel* label);
    877 
    878   void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
    879   void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
    880 
    881   // If any input is not false, output the necessary rex prefix.
    882   void EmitOptionalRex(bool force, bool w, bool r, bool x, bool b);
    883 
    884   // Emit a rex prefix byte if necessary for reg. ie if reg is a register in the range R8 to R15.
    885   void EmitOptionalRex32(CpuRegister reg);
    886   void EmitOptionalRex32(CpuRegister dst, CpuRegister src);
    887   void EmitOptionalRex32(XmmRegister dst, XmmRegister src);
    888   void EmitOptionalRex32(CpuRegister dst, XmmRegister src);
    889   void EmitOptionalRex32(XmmRegister dst, CpuRegister src);
    890   void EmitOptionalRex32(const Operand& operand);
    891   void EmitOptionalRex32(CpuRegister dst, const Operand& operand);
    892   void EmitOptionalRex32(XmmRegister dst, const Operand& operand);
    893 
    894   // Emit a REX.W prefix plus necessary register bit encodings.
    895   void EmitRex64();
    896   void EmitRex64(CpuRegister reg);
    897   void EmitRex64(const Operand& operand);
    898   void EmitRex64(CpuRegister dst, CpuRegister src);
    899   void EmitRex64(CpuRegister dst, const Operand& operand);
    900   void EmitRex64(XmmRegister dst, const Operand& operand);
    901   void EmitRex64(XmmRegister dst, CpuRegister src);
    902   void EmitRex64(CpuRegister dst, XmmRegister src);
    903 
    904   // Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
    905   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
    906   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
    907 
    908   ConstantArea constant_area_;
    909 
    910   DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
    911 };
    912 
    913 inline void X86_64Assembler::EmitUint8(uint8_t value) {
    914   buffer_.Emit<uint8_t>(value);
    915 }
    916 
    917 inline void X86_64Assembler::EmitInt32(int32_t value) {
    918   buffer_.Emit<int32_t>(value);
    919 }
    920 
    921 inline void X86_64Assembler::EmitInt64(int64_t value) {
    922   // Write this 64-bit value as two 32-bit words for alignment reasons
    923   // (this is essentially when running on ARM, which does not allow
    924   // 64-bit unaligned accesses).  We assume little-endianness here.
    925   EmitInt32(Low32Bits(value));
    926   EmitInt32(High32Bits(value));
    927 }
    928 
    929 inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) {
    930   CHECK_GE(rm, 0);
    931   CHECK_LT(rm, 8);
    932   buffer_.Emit<uint8_t>((0xC0 | (reg & 7)) + (rm << 3));
    933 }
    934 
    935 inline void X86_64Assembler::EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg) {
    936   EmitRegisterOperand(rm, static_cast<uint8_t>(reg.AsFloatRegister()));
    937 }
    938 
    939 inline void X86_64Assembler::EmitFixup(AssemblerFixup* fixup) {
    940   buffer_.EmitFixup(fixup);
    941 }
    942 
    943 inline void X86_64Assembler::EmitOperandSizeOverride() {
    944   EmitUint8(0x66);
    945 }
    946 
    947 }  // namespace x86_64
    948 }  // namespace art
    949 
    950 #endif  // ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
    951