Home | History | Annotate | Download | only in x64
      1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
      2 // All Rights Reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 // - Redistributions of source code must retain the above copyright notice,
      9 // this list of conditions and the following disclaimer.
     10 //
     11 // - Redistribution in binary form must reproduce the above copyright
     12 // notice, this list of conditions and the following disclaimer in the
     13 // documentation and/or other materials provided with the distribution.
     14 //
     15 // - Neither the name of Sun Microsystems or the names of contributors may
     16 // be used to endorse or promote products derived from this software without
     17 // specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
     20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
     21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
     23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // The original source code covered by the above license above has been
     32 // modified significantly by Google Inc.
     33 // Copyright 2011 the V8 project authors. All rights reserved.
     34 
     35 // A lightweight X64 Assembler.
     36 
     37 #ifndef V8_X64_ASSEMBLER_X64_H_
     38 #define V8_X64_ASSEMBLER_X64_H_
     39 
     40 #include "serialize.h"
     41 
     42 namespace v8 {
     43 namespace internal {
     44 
     45 // Utility functions
     46 
     47 // Test whether a 64-bit value is in a specific range.
     48 static inline bool is_uint32(int64_t x) {
     49   static const uint64_t kMaxUInt32 = V8_UINT64_C(0xffffffff);
     50   return static_cast<uint64_t>(x) <= kMaxUInt32;
     51 }
     52 
     53 static inline bool is_int32(int64_t x) {
     54   static const int64_t kMinInt32 = -V8_INT64_C(0x80000000);
     55   return is_uint32(x - kMinInt32);
     56 }
     57 
     58 static inline bool uint_is_int32(uint64_t x) {
     59   static const uint64_t kMaxInt32 = V8_UINT64_C(0x7fffffff);
     60   return x <= kMaxInt32;
     61 }
     62 
     63 static inline bool is_uint32(uint64_t x) {
     64   static const uint64_t kMaxUInt32 = V8_UINT64_C(0xffffffff);
     65   return x <= kMaxUInt32;
     66 }
     67 
     68 // CPU Registers.
     69 //
     70 // 1) We would prefer to use an enum, but enum values are assignment-
     71 // compatible with int, which has caused code-generation bugs.
     72 //
     73 // 2) We would prefer to use a class instead of a struct but we don't like
     74 // the register initialization to depend on the particular initialization
     75 // order (which appears to be different on OS X, Linux, and Windows for the
     76 // installed versions of C++ we tried). Using a struct permits C-style
     77 // "initialization". Also, the Register objects cannot be const as this
     78 // forces initialization stubs in MSVC, making us dependent on initialization
     79 // order.
     80 //
     81 // 3) By not using an enum, we are possibly preventing the compiler from
     82 // doing certain constant folds, which may significantly reduce the
     83 // code generated for some assembly instructions (because they boil down
     84 // to a few constants). If this is a problem, we could change the code
     85 // such that we use an enum in optimized mode, and the struct in debug
     86 // mode. This way we get the compile-time error checking in debug mode
     87 // and best performance in optimized code.
     88 //
     89 
     90 struct Register {
     91   // The non-allocatable registers are:
     92   //  rsp - stack pointer
     93   //  rbp - frame pointer
     94   //  rsi - context register
     95   //  r10 - fixed scratch register
     96   //  r12 - smi constant register
     97   //  r13 - root register
     98   static const int kNumRegisters = 16;
     99   static const int kNumAllocatableRegisters = 10;
    100 
    101   static int ToAllocationIndex(Register reg) {
    102     return kAllocationIndexByRegisterCode[reg.code()];
    103   }
    104 
    105   static Register FromAllocationIndex(int index) {
    106     ASSERT(index >= 0 && index < kNumAllocatableRegisters);
    107     Register result = { kRegisterCodeByAllocationIndex[index] };
    108     return result;
    109   }
    110 
    111   static const char* AllocationIndexToString(int index) {
    112     ASSERT(index >= 0 && index < kNumAllocatableRegisters);
    113     const char* const names[] = {
    114       "rax",
    115       "rbx",
    116       "rdx",
    117       "rcx",
    118       "rdi",
    119       "r8",
    120       "r9",
    121       "r11",
    122       "r14",
    123       "r15"
    124     };
    125     return names[index];
    126   }
    127 
    128   static Register toRegister(int code) {
    129     Register r = { code };
    130     return r;
    131   }
    132   bool is_valid() const { return 0 <= code_ && code_ < kNumRegisters; }
    133   bool is(Register reg) const { return code_ == reg.code_; }
    134   int code() const {
    135     ASSERT(is_valid());
    136     return code_;
    137   }
    138   int bit() const {
    139     return 1 << code_;
    140   }
    141 
    142   // Return the high bit of the register code as a 0 or 1.  Used often
    143   // when constructing the REX prefix byte.
    144   int high_bit() const {
    145     return code_ >> 3;
    146   }
    147   // Return the 3 low bits of the register code.  Used when encoding registers
    148   // in modR/M, SIB, and opcode bytes.
    149   int low_bits() const {
    150     return code_ & 0x7;
    151   }
    152 
    153   // Unfortunately we can't make this private in a struct when initializing
    154   // by assignment.
    155   int code_;
    156 
    157  private:
    158   static const int kRegisterCodeByAllocationIndex[kNumAllocatableRegisters];
    159   static const int kAllocationIndexByRegisterCode[kNumRegisters];
    160 };
    161 
    162 const Register rax = { 0 };
    163 const Register rcx = { 1 };
    164 const Register rdx = { 2 };
    165 const Register rbx = { 3 };
    166 const Register rsp = { 4 };
    167 const Register rbp = { 5 };
    168 const Register rsi = { 6 };
    169 const Register rdi = { 7 };
    170 const Register r8 = { 8 };
    171 const Register r9 = { 9 };
    172 const Register r10 = { 10 };
    173 const Register r11 = { 11 };
    174 const Register r12 = { 12 };
    175 const Register r13 = { 13 };
    176 const Register r14 = { 14 };
    177 const Register r15 = { 15 };
    178 const Register no_reg = { -1 };
    179 
    180 
    181 struct XMMRegister {
    182   static const int kNumRegisters = 16;
    183   static const int kNumAllocatableRegisters = 15;
    184 
    185   static int ToAllocationIndex(XMMRegister reg) {
    186     ASSERT(reg.code() != 0);
    187     return reg.code() - 1;
    188   }
    189 
    190   static XMMRegister FromAllocationIndex(int index) {
    191     ASSERT(0 <= index && index < kNumAllocatableRegisters);
    192     XMMRegister result = { index + 1 };
    193     return result;
    194   }
    195 
    196   static const char* AllocationIndexToString(int index) {
    197     ASSERT(index >= 0 && index < kNumAllocatableRegisters);
    198     const char* const names[] = {
    199       "xmm1",
    200       "xmm2",
    201       "xmm3",
    202       "xmm4",
    203       "xmm5",
    204       "xmm6",
    205       "xmm7",
    206       "xmm8",
    207       "xmm9",
    208       "xmm10",
    209       "xmm11",
    210       "xmm12",
    211       "xmm13",
    212       "xmm14",
    213       "xmm15"
    214     };
    215     return names[index];
    216   }
    217 
    218   bool is_valid() const { return 0 <= code_ && code_ < kNumRegisters; }
    219   bool is(XMMRegister reg) const { return code_ == reg.code_; }
    220   int code() const {
    221     ASSERT(is_valid());
    222     return code_;
    223   }
    224 
    225   // Return the high bit of the register code as a 0 or 1.  Used often
    226   // when constructing the REX prefix byte.
    227   int high_bit() const {
    228     return code_ >> 3;
    229   }
    230   // Return the 3 low bits of the register code.  Used when encoding registers
    231   // in modR/M, SIB, and opcode bytes.
    232   int low_bits() const {
    233     return code_ & 0x7;
    234   }
    235 
    236   int code_;
    237 };
    238 
    239 const XMMRegister xmm0 = { 0 };
    240 const XMMRegister xmm1 = { 1 };
    241 const XMMRegister xmm2 = { 2 };
    242 const XMMRegister xmm3 = { 3 };
    243 const XMMRegister xmm4 = { 4 };
    244 const XMMRegister xmm5 = { 5 };
    245 const XMMRegister xmm6 = { 6 };
    246 const XMMRegister xmm7 = { 7 };
    247 const XMMRegister xmm8 = { 8 };
    248 const XMMRegister xmm9 = { 9 };
    249 const XMMRegister xmm10 = { 10 };
    250 const XMMRegister xmm11 = { 11 };
    251 const XMMRegister xmm12 = { 12 };
    252 const XMMRegister xmm13 = { 13 };
    253 const XMMRegister xmm14 = { 14 };
    254 const XMMRegister xmm15 = { 15 };
    255 
    256 
    257 typedef XMMRegister DoubleRegister;
    258 
    259 
    260 enum Condition {
    261   // any value < 0 is considered no_condition
    262   no_condition  = -1,
    263 
    264   overflow      =  0,
    265   no_overflow   =  1,
    266   below         =  2,
    267   above_equal   =  3,
    268   equal         =  4,
    269   not_equal     =  5,
    270   below_equal   =  6,
    271   above         =  7,
    272   negative      =  8,
    273   positive      =  9,
    274   parity_even   = 10,
    275   parity_odd    = 11,
    276   less          = 12,
    277   greater_equal = 13,
    278   less_equal    = 14,
    279   greater       = 15,
    280 
    281   // Fake conditions that are handled by the
    282   // opcodes using them.
    283   always        = 16,
    284   never         = 17,
    285   // aliases
    286   carry         = below,
    287   not_carry     = above_equal,
    288   zero          = equal,
    289   not_zero      = not_equal,
    290   sign          = negative,
    291   not_sign      = positive,
    292   last_condition = greater
    293 };
    294 
    295 
    296 // Returns the equivalent of !cc.
    297 // Negation of the default no_condition (-1) results in a non-default
    298 // no_condition value (-2). As long as tests for no_condition check
    299 // for condition < 0, this will work as expected.
    300 inline Condition NegateCondition(Condition cc) {
    301   return static_cast<Condition>(cc ^ 1);
    302 }
    303 
    304 
    305 // Corresponds to transposing the operands of a comparison.
    306 inline Condition ReverseCondition(Condition cc) {
    307   switch (cc) {
    308     case below:
    309       return above;
    310     case above:
    311       return below;
    312     case above_equal:
    313       return below_equal;
    314     case below_equal:
    315       return above_equal;
    316     case less:
    317       return greater;
    318     case greater:
    319       return less;
    320     case greater_equal:
    321       return less_equal;
    322     case less_equal:
    323       return greater_equal;
    324     default:
    325       return cc;
    326   };
    327 }
    328 
    329 
    330 enum Hint {
    331   no_hint = 0,
    332   not_taken = 0x2e,
    333   taken = 0x3e
    334 };
    335 
    336 // The result of negating a hint is as if the corresponding condition
    337 // were negated by NegateCondition.  That is, no_hint is mapped to
    338 // itself and not_taken and taken are mapped to each other.
    339 inline Hint NegateHint(Hint hint) {
    340   return (hint == no_hint)
    341       ? no_hint
    342       : ((hint == not_taken) ? taken : not_taken);
    343 }
    344 
    345 
    346 // -----------------------------------------------------------------------------
    347 // Machine instruction Immediates
    348 
    349 class Immediate BASE_EMBEDDED {
    350  public:
    351   explicit Immediate(int32_t value) : value_(value) {}
    352 
    353  private:
    354   int32_t value_;
    355 
    356   friend class Assembler;
    357 };
    358 
    359 
    360 // -----------------------------------------------------------------------------
    361 // Machine instruction Operands
    362 
    363 enum ScaleFactor {
    364   times_1 = 0,
    365   times_2 = 1,
    366   times_4 = 2,
    367   times_8 = 3,
    368   times_int_size = times_4,
    369   times_pointer_size = times_8
    370 };
    371 
    372 
    373 class Operand BASE_EMBEDDED {
    374  public:
    375   // [base + disp/r]
    376   Operand(Register base, int32_t disp);
    377 
    378   // [base + index*scale + disp/r]
    379   Operand(Register base,
    380           Register index,
    381           ScaleFactor scale,
    382           int32_t disp);
    383 
    384   // [index*scale + disp/r]
    385   Operand(Register index,
    386           ScaleFactor scale,
    387           int32_t disp);
    388 
    389   // Offset from existing memory operand.
    390   // Offset is added to existing displacement as 32-bit signed values and
    391   // this must not overflow.
    392   Operand(const Operand& base, int32_t offset);
    393 
    394   // Checks whether either base or index register is the given register.
    395   // Does not check the "reg" part of the Operand.
    396   bool AddressUsesRegister(Register reg) const;
    397 
    398   // Queries related to the size of the generated instruction.
    399   // Whether the generated instruction will have a REX prefix.
    400   bool requires_rex() const { return rex_ != 0; }
    401   // Size of the ModR/M, SIB and displacement parts of the generated
    402   // instruction.
    403   int operand_size() const { return len_; }
    404 
    405  private:
    406   byte rex_;
    407   byte buf_[6];
    408   // The number of bytes of buf_ in use.
    409   byte len_;
    410 
    411   // Set the ModR/M byte without an encoded 'reg' register. The
    412   // register is encoded later as part of the emit_operand operation.
    413   // set_modrm can be called before or after set_sib and set_disp*.
    414   inline void set_modrm(int mod, Register rm);
    415 
    416   // Set the SIB byte if one is needed. Sets the length to 2 rather than 1.
    417   inline void set_sib(ScaleFactor scale, Register index, Register base);
    418 
    419   // Adds operand displacement fields (offsets added to the memory address).
    420   // Needs to be called after set_sib, not before it.
    421   inline void set_disp8(int disp);
    422   inline void set_disp32(int disp);
    423 
    424   friend class Assembler;
    425 };
    426 
    427 
    428 // CpuFeatures keeps track of which features are supported by the target CPU.
    429 // Supported features must be enabled by a Scope before use.
    430 // Example:
    431 //   if (CpuFeatures::IsSupported(SSE3)) {
    432 //     CpuFeatures::Scope fscope(SSE3);
    433 //     // Generate SSE3 floating point code.
    434 //   } else {
    435 //     // Generate standard x87 or SSE2 floating point code.
    436 //   }
    437 class CpuFeatures : public AllStatic {
    438  public:
    439   // Detect features of the target CPU. Set safe defaults if the serializer
    440   // is enabled (snapshots must be portable).
    441   static void Probe();
    442 
    443   // Check whether a feature is supported by the target CPU.
    444   static bool IsSupported(CpuFeature f) {
    445     ASSERT(initialized_);
    446     if (f == SSE2 && !FLAG_enable_sse2) return false;
    447     if (f == SSE3 && !FLAG_enable_sse3) return false;
    448     if (f == CMOV && !FLAG_enable_cmov) return false;
    449     if (f == RDTSC && !FLAG_enable_rdtsc) return false;
    450     if (f == SAHF && !FLAG_enable_sahf) return false;
    451     return (supported_ & (V8_UINT64_C(1) << f)) != 0;
    452   }
    453 
    454 #ifdef DEBUG
    455   // Check whether a feature is currently enabled.
    456   static bool IsEnabled(CpuFeature f) {
    457     ASSERT(initialized_);
    458     Isolate* isolate = Isolate::UncheckedCurrent();
    459     if (isolate == NULL) {
    460       // When no isolate is available, work as if we're running in
    461       // release mode.
    462       return IsSupported(f);
    463     }
    464     uint64_t enabled = isolate->enabled_cpu_features();
    465     return (enabled & (V8_UINT64_C(1) << f)) != 0;
    466   }
    467 #endif
    468 
    469   // Enable a specified feature within a scope.
    470   class Scope BASE_EMBEDDED {
    471 #ifdef DEBUG
    472    public:
    473     explicit Scope(CpuFeature f) {
    474       uint64_t mask = V8_UINT64_C(1) << f;
    475       ASSERT(CpuFeatures::IsSupported(f));
    476       ASSERT(!Serializer::enabled() ||
    477              (CpuFeatures::found_by_runtime_probing_ & mask) == 0);
    478       isolate_ = Isolate::UncheckedCurrent();
    479       old_enabled_ = 0;
    480       if (isolate_ != NULL) {
    481         old_enabled_ = isolate_->enabled_cpu_features();
    482         isolate_->set_enabled_cpu_features(old_enabled_ | mask);
    483       }
    484     }
    485     ~Scope() {
    486       ASSERT_EQ(Isolate::UncheckedCurrent(), isolate_);
    487       if (isolate_ != NULL) {
    488         isolate_->set_enabled_cpu_features(old_enabled_);
    489       }
    490     }
    491    private:
    492     Isolate* isolate_;
    493     uint64_t old_enabled_;
    494 #else
    495    public:
    496     explicit Scope(CpuFeature f) {}
    497 #endif
    498   };
    499 
    500  private:
    501   // Safe defaults include SSE2 and CMOV for X64. It is always available, if
    502   // anyone checks, but they shouldn't need to check.
    503   // The required user mode extensions in X64 are (from AMD64 ABI Table A.1):
    504   //   fpu, tsc, cx8, cmov, mmx, sse, sse2, fxsr, syscall
    505   static const uint64_t kDefaultCpuFeatures = (1 << SSE2 | 1 << CMOV);
    506 
    507 #ifdef DEBUG
    508   static bool initialized_;
    509 #endif
    510   static uint64_t supported_;
    511   static uint64_t found_by_runtime_probing_;
    512 
    513   DISALLOW_COPY_AND_ASSIGN(CpuFeatures);
    514 };
    515 
    516 
    517 class Assembler : public AssemblerBase {
    518  private:
    519   // We check before assembling an instruction that there is sufficient
    520   // space to write an instruction and its relocation information.
    521   // The relocation writer's position must be kGap bytes above the end of
    522   // the generated instructions. This leaves enough space for the
    523   // longest possible x64 instruction, 15 bytes, and the longest possible
    524   // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
    525   // (There is a 15 byte limit on x64 instruction length that rules out some
    526   // otherwise valid instructions.)
    527   // This allows for a single, fast space check per instruction.
    528   static const int kGap = 32;
    529 
    530  public:
    531   // Create an assembler. Instructions and relocation information are emitted
    532   // into a buffer, with the instructions starting from the beginning and the
    533   // relocation information starting from the end of the buffer. See CodeDesc
    534   // for a detailed comment on the layout (globals.h).
    535   //
    536   // If the provided buffer is NULL, the assembler allocates and grows its own
    537   // buffer, and buffer_size determines the initial buffer size. The buffer is
    538   // owned by the assembler and deallocated upon destruction of the assembler.
    539   //
    540   // If the provided buffer is not NULL, the assembler uses the provided buffer
    541   // for code generation and assumes its size to be buffer_size. If the buffer
    542   // is too small, a fatal error occurs. No deallocation of the buffer is done
    543   // upon destruction of the assembler.
    544   Assembler(Isolate* isolate, void* buffer, int buffer_size);
    545   ~Assembler();
    546 
    547   // Overrides the default provided by FLAG_debug_code.
    548   void set_emit_debug_code(bool value) { emit_debug_code_ = value; }
    549 
    550   // GetCode emits any pending (non-emitted) code and fills the descriptor
    551   // desc. GetCode() is idempotent; it returns the same result if no other
    552   // Assembler functions are invoked in between GetCode() calls.
    553   void GetCode(CodeDesc* desc);
    554 
    555   // Read/Modify the code target in the relative branch/call instruction at pc.
    556   // On the x64 architecture, we use relative jumps with a 32-bit displacement
    557   // to jump to other Code objects in the Code space in the heap.
    558   // Jumps to C functions are done indirectly through a 64-bit register holding
    559   // the absolute address of the target.
    560   // These functions convert between absolute Addresses of Code objects and
    561   // the relative displacements stored in the code.
    562   static inline Address target_address_at(Address pc);
    563   static inline void set_target_address_at(Address pc, Address target);
    564 
    565   // This sets the branch destination (which is in the instruction on x64).
    566   // This is for calls and branches within generated code.
    567   inline static void set_target_at(Address instruction_payload,
    568                                    Address target) {
    569     set_target_address_at(instruction_payload, target);
    570   }
    571 
    572   // This sets the branch destination (which is a load instruction on x64).
    573   // This is for calls and branches to runtime code.
    574   inline static void set_external_target_at(Address instruction_payload,
    575                                             Address target) {
    576     *reinterpret_cast<Address*>(instruction_payload) = target;
    577   }
    578 
    579   inline Handle<Object> code_target_object_handle_at(Address pc);
    580   // Number of bytes taken up by the branch target in the code.
    581   static const int kCallTargetSize = 4;      // Use 32-bit displacement.
    582   static const int kExternalTargetSize = 8;  // Use 64-bit absolute.
    583   // Distance between the address of the code target in the call instruction
    584   // and the return address pushed on the stack.
    585   static const int kCallTargetAddressOffset = 4;  // Use 32-bit displacement.
    586   // Distance between the start of the JS return sequence and where the
    587   // 32-bit displacement of a near call would be, relative to the pushed
    588   // return address.  TODO: Use return sequence length instead.
    589   // Should equal Debug::kX64JSReturnSequenceLength - kCallTargetAddressOffset;
    590   static const int kPatchReturnSequenceAddressOffset = 13 - 4;
    591   // Distance between start of patched debug break slot and where the
    592   // 32-bit displacement of a near call would be, relative to the pushed
    593   // return address.  TODO: Use return sequence length instead.
    594   // Should equal Debug::kX64JSReturnSequenceLength - kCallTargetAddressOffset;
    595   static const int kPatchDebugBreakSlotAddressOffset = 13 - 4;
    596   // TODO(X64): Rename this, removing the "Real", after changing the above.
    597   static const int kRealPatchReturnSequenceAddressOffset = 2;
    598 
    599   // Some x64 JS code is padded with int3 to make it large
    600   // enough to hold an instruction when the debugger patches it.
    601   static const int kJumpInstructionLength = 13;
    602   static const int kCallInstructionLength = 13;
    603   static const int kJSReturnSequenceLength = 13;
    604   static const int kShortCallInstructionLength = 5;
    605 
    606   // The debug break slot must be able to contain a call instruction.
    607   static const int kDebugBreakSlotLength = kCallInstructionLength;
    608 
    609   // One byte opcode for test eax,0xXXXXXXXX.
    610   static const byte kTestEaxByte = 0xA9;
    611   // One byte opcode for test al, 0xXX.
    612   static const byte kTestAlByte = 0xA8;
    613   // One byte opcode for nop.
    614   static const byte kNopByte = 0x90;
    615 
    616   // One byte prefix for a short conditional jump.
    617   static const byte kJccShortPrefix = 0x70;
    618   static const byte kJncShortOpcode = kJccShortPrefix | not_carry;
    619   static const byte kJcShortOpcode = kJccShortPrefix | carry;
    620 
    621 
    622 
    623   // ---------------------------------------------------------------------------
    624   // Code generation
    625   //
    626   // Function names correspond one-to-one to x64 instruction mnemonics.
    627   // Unless specified otherwise, instructions operate on 64-bit operands.
    628   //
    629   // If we need versions of an assembly instruction that operate on different
    630   // width arguments, we add a single-letter suffix specifying the width.
    631   // This is done for the following instructions: mov, cmp, inc, dec,
    632   // add, sub, and test.
    633   // There are no versions of these instructions without the suffix.
    634   // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
    635   // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
    636   // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
    637   // - Instructions on 64-bit (quadword) operands/registers use 'q'.
    638   //
    639   // Some mnemonics, such as "and", are the same as C++ keywords.
    640   // Naming conflicts with C++ keywords are resolved by adding a trailing '_'.
    641 
    642   // Insert the smallest number of nop instructions
    643   // possible to align the pc offset to a multiple
    644   // of m, where m must be a power of 2.
    645   void Align(int m);
    646   // Aligns code to something that's optimal for a jump target for the platform.
    647   void CodeTargetAlign();
    648 
    649   // Stack
    650   void pushfq();
    651   void popfq();
    652 
    653   void push(Immediate value);
    654   // Push a 32 bit integer, and guarantee that it is actually pushed as a
    655   // 32 bit value, the normal push will optimize the 8 bit case.
    656   void push_imm32(int32_t imm32);
    657   void push(Register src);
    658   void push(const Operand& src);
    659 
    660   void pop(Register dst);
    661   void pop(const Operand& dst);
    662 
    663   void enter(Immediate size);
    664   void leave();
    665 
    666   // Moves
    667   void movb(Register dst, const Operand& src);
    668   void movb(Register dst, Immediate imm);
    669   void movb(const Operand& dst, Register src);
    670 
    671   // Move the low 16 bits of a 64-bit register value to a 16-bit
    672   // memory location.
    673   void movw(const Operand& dst, Register src);
    674 
    675   void movl(Register dst, Register src);
    676   void movl(Register dst, const Operand& src);
    677   void movl(const Operand& dst, Register src);
    678   void movl(const Operand& dst, Immediate imm);
    679   // Load a 32-bit immediate value, zero-extended to 64 bits.
    680   void movl(Register dst, Immediate imm32);
    681 
    682   // Move 64 bit register value to 64-bit memory location.
    683   void movq(const Operand& dst, Register src);
    684   // Move 64 bit memory location to 64-bit register value.
    685   void movq(Register dst, const Operand& src);
    686   void movq(Register dst, Register src);
    687   // Sign extends immediate 32-bit value to 64 bits.
    688   void movq(Register dst, Immediate x);
    689   // Move the offset of the label location relative to the current
    690   // position (after the move) to the destination.
    691   void movl(const Operand& dst, Label* src);
    692 
    693   // Move sign extended immediate to memory location.
    694   void movq(const Operand& dst, Immediate value);
    695   // Instructions to load a 64-bit immediate into a register.
    696   // All 64-bit immediates must have a relocation mode.
    697   void movq(Register dst, void* ptr, RelocInfo::Mode rmode);
    698   void movq(Register dst, int64_t value, RelocInfo::Mode rmode);
    699   void movq(Register dst, const char* s, RelocInfo::Mode rmode);
    700   // Moves the address of the external reference into the register.
    701   void movq(Register dst, ExternalReference ext);
    702   void movq(Register dst, Handle<Object> handle, RelocInfo::Mode rmode);
    703 
    704   void movsxbq(Register dst, const Operand& src);
    705   void movsxwq(Register dst, const Operand& src);
    706   void movsxlq(Register dst, Register src);
    707   void movsxlq(Register dst, const Operand& src);
    708   void movzxbq(Register dst, const Operand& src);
    709   void movzxbl(Register dst, const Operand& src);
    710   void movzxwq(Register dst, const Operand& src);
    711   void movzxwl(Register dst, const Operand& src);
    712 
    713   // Repeated moves.
    714 
    715   void repmovsb();
    716   void repmovsw();
    717   void repmovsl();
    718   void repmovsq();
    719 
    720   // Instruction to load from an immediate 64-bit pointer into RAX.
    721   void load_rax(void* ptr, RelocInfo::Mode rmode);
    722   void load_rax(ExternalReference ext);
    723 
    724   // Conditional moves.
    725   void cmovq(Condition cc, Register dst, Register src);
    726   void cmovq(Condition cc, Register dst, const Operand& src);
    727   void cmovl(Condition cc, Register dst, Register src);
    728   void cmovl(Condition cc, Register dst, const Operand& src);
    729 
    730   // Exchange two registers
    731   void xchg(Register dst, Register src);
    732 
    733   // Arithmetics
    734   void addl(Register dst, Register src) {
    735     arithmetic_op_32(0x03, dst, src);
    736   }
    737 
    738   void addl(Register dst, Immediate src) {
    739     immediate_arithmetic_op_32(0x0, dst, src);
    740   }
    741 
    742   void addl(Register dst, const Operand& src) {
    743     arithmetic_op_32(0x03, dst, src);
    744   }
    745 
    746   void addl(const Operand& dst, Immediate src) {
    747     immediate_arithmetic_op_32(0x0, dst, src);
    748   }
    749 
    750   void addq(Register dst, Register src) {
    751     arithmetic_op(0x03, dst, src);
    752   }
    753 
    754   void addq(Register dst, const Operand& src) {
    755     arithmetic_op(0x03, dst, src);
    756   }
    757 
    758   void addq(const Operand& dst, Register src) {
    759     arithmetic_op(0x01, src, dst);
    760   }
    761 
    762   void addq(Register dst, Immediate src) {
    763     immediate_arithmetic_op(0x0, dst, src);
    764   }
    765 
    766   void addq(const Operand& dst, Immediate src) {
    767     immediate_arithmetic_op(0x0, dst, src);
    768   }
    769 
    770   void sbbl(Register dst, Register src) {
    771     arithmetic_op_32(0x1b, dst, src);
    772   }
    773 
    774   void sbbq(Register dst, Register src) {
    775     arithmetic_op(0x1b, dst, src);
    776   }
    777 
    778   void cmpb(Register dst, Immediate src) {
    779     immediate_arithmetic_op_8(0x7, dst, src);
    780   }
    781 
    782   void cmpb_al(Immediate src);
    783 
    784   void cmpb(Register dst, Register src) {
    785     arithmetic_op(0x3A, dst, src);
    786   }
    787 
    788   void cmpb(Register dst, const Operand& src) {
    789     arithmetic_op(0x3A, dst, src);
    790   }
    791 
    792   void cmpb(const Operand& dst, Register src) {
    793     arithmetic_op(0x38, src, dst);
    794   }
    795 
    796   void cmpb(const Operand& dst, Immediate src) {
    797     immediate_arithmetic_op_8(0x7, dst, src);
    798   }
    799 
    800   void cmpw(const Operand& dst, Immediate src) {
    801     immediate_arithmetic_op_16(0x7, dst, src);
    802   }
    803 
    804   void cmpw(Register dst, Immediate src) {
    805     immediate_arithmetic_op_16(0x7, dst, src);
    806   }
    807 
    808   void cmpw(Register dst, const Operand& src) {
    809     arithmetic_op_16(0x3B, dst, src);
    810   }
    811 
    812   void cmpw(Register dst, Register src) {
    813     arithmetic_op_16(0x3B, dst, src);
    814   }
    815 
    816   void cmpw(const Operand& dst, Register src) {
    817     arithmetic_op_16(0x39, src, dst);
    818   }
    819 
    820   void cmpl(Register dst, Register src) {
    821     arithmetic_op_32(0x3B, dst, src);
    822   }
    823 
    824   void cmpl(Register dst, const Operand& src) {
    825     arithmetic_op_32(0x3B, dst, src);
    826   }
    827 
    828   void cmpl(const Operand& dst, Register src) {
    829     arithmetic_op_32(0x39, src, dst);
    830   }
    831 
    832   void cmpl(Register dst, Immediate src) {
    833     immediate_arithmetic_op_32(0x7, dst, src);
    834   }
    835 
    836   void cmpl(const Operand& dst, Immediate src) {
    837     immediate_arithmetic_op_32(0x7, dst, src);
    838   }
    839 
    840   void cmpq(Register dst, Register src) {
    841     arithmetic_op(0x3B, dst, src);
    842   }
    843 
    844   void cmpq(Register dst, const Operand& src) {
    845     arithmetic_op(0x3B, dst, src);
    846   }
    847 
    848   void cmpq(const Operand& dst, Register src) {
    849     arithmetic_op(0x39, src, dst);
    850   }
    851 
    852   void cmpq(Register dst, Immediate src) {
    853     immediate_arithmetic_op(0x7, dst, src);
    854   }
    855 
    856   void cmpq(const Operand& dst, Immediate src) {
    857     immediate_arithmetic_op(0x7, dst, src);
    858   }
    859 
    860   void and_(Register dst, Register src) {
    861     arithmetic_op(0x23, dst, src);
    862   }
    863 
    864   void and_(Register dst, const Operand& src) {
    865     arithmetic_op(0x23, dst, src);
    866   }
    867 
    868   void and_(const Operand& dst, Register src) {
    869     arithmetic_op(0x21, src, dst);
    870   }
    871 
    872   void and_(Register dst, Immediate src) {
    873     immediate_arithmetic_op(0x4, dst, src);
    874   }
    875 
    876   void and_(const Operand& dst, Immediate src) {
    877     immediate_arithmetic_op(0x4, dst, src);
    878   }
    879 
    880   void andl(Register dst, Immediate src) {
    881     immediate_arithmetic_op_32(0x4, dst, src);
    882   }
    883 
    884   void andl(Register dst, Register src) {
    885     arithmetic_op_32(0x23, dst, src);
    886   }
    887 
    888   void andl(Register dst, const Operand& src) {
    889     arithmetic_op_32(0x23, dst, src);
    890   }
    891 
    892   void andb(Register dst, Immediate src) {
    893     immediate_arithmetic_op_8(0x4, dst, src);
    894   }
    895 
    896   void decq(Register dst);
    897   void decq(const Operand& dst);
    898   void decl(Register dst);
    899   void decl(const Operand& dst);
    900   void decb(Register dst);
    901   void decb(const Operand& dst);
    902 
    903   // Sign-extends rax into rdx:rax.
    904   void cqo();
    905   // Sign-extends eax into edx:eax.
    906   void cdq();
    907 
    908   // Divide rdx:rax by src.  Quotient in rax, remainder in rdx.
    909   void idivq(Register src);
    910   // Divide edx:eax by lower 32 bits of src.  Quotient in eax, rem. in edx.
    911   void idivl(Register src);
    912 
    913   // Signed multiply instructions.
    914   void imul(Register src);                               // rdx:rax = rax * src.
    915   void imul(Register dst, Register src);                 // dst = dst * src.
    916   void imul(Register dst, const Operand& src);           // dst = dst * src.
    917   void imul(Register dst, Register src, Immediate imm);  // dst = src * imm.
    918   // Signed 32-bit multiply instructions.
    919   void imull(Register dst, Register src);                 // dst = dst * src.
    920   void imull(Register dst, const Operand& src);           // dst = dst * src.
    921   void imull(Register dst, Register src, Immediate imm);  // dst = src * imm.
    922 
    923   void incq(Register dst);
    924   void incq(const Operand& dst);
    925   void incl(Register dst);
    926   void incl(const Operand& dst);
    927 
    928   void lea(Register dst, const Operand& src);
    929   void leal(Register dst, const Operand& src);
    930 
    931   // Multiply rax by src, put the result in rdx:rax.
    932   void mul(Register src);
    933 
    934   void neg(Register dst);
    935   void neg(const Operand& dst);
    936   void negl(Register dst);
    937 
    938   void not_(Register dst);
    939   void not_(const Operand& dst);
    940   void notl(Register dst);
    941 
    942   void or_(Register dst, Register src) {
    943     arithmetic_op(0x0B, dst, src);
    944   }
    945 
    946   void orl(Register dst, Register src) {
    947     arithmetic_op_32(0x0B, dst, src);
    948   }
    949 
    950   void or_(Register dst, const Operand& src) {
    951     arithmetic_op(0x0B, dst, src);
    952   }
    953 
    954   void orl(Register dst, const Operand& src) {
    955     arithmetic_op_32(0x0B, dst, src);
    956   }
    957 
    958   void or_(const Operand& dst, Register src) {
    959     arithmetic_op(0x09, src, dst);
    960   }
    961 
    962   void or_(Register dst, Immediate src) {
    963     immediate_arithmetic_op(0x1, dst, src);
    964   }
    965 
    966   void orl(Register dst, Immediate src) {
    967     immediate_arithmetic_op_32(0x1, dst, src);
    968   }
    969 
    970   void or_(const Operand& dst, Immediate src) {
    971     immediate_arithmetic_op(0x1, dst, src);
    972   }
    973 
    974   void orl(const Operand& dst, Immediate src) {
    975     immediate_arithmetic_op_32(0x1, dst, src);
    976   }
    977 
    978 
    979   void rcl(Register dst, Immediate imm8) {
    980     shift(dst, imm8, 0x2);
    981   }
    982 
    983   void rol(Register dst, Immediate imm8) {
    984     shift(dst, imm8, 0x0);
    985   }
    986 
    987   void rcr(Register dst, Immediate imm8) {
    988     shift(dst, imm8, 0x3);
    989   }
    990 
    991   void ror(Register dst, Immediate imm8) {
    992     shift(dst, imm8, 0x1);
    993   }
    994 
    995   // Shifts dst:src left by cl bits, affecting only dst.
    996   void shld(Register dst, Register src);
    997 
    998   // Shifts src:dst right by cl bits, affecting only dst.
    999   void shrd(Register dst, Register src);
   1000 
   1001   // Shifts dst right, duplicating sign bit, by shift_amount bits.
   1002   // Shifting by 1 is handled efficiently.
   1003   void sar(Register dst, Immediate shift_amount) {
   1004     shift(dst, shift_amount, 0x7);
   1005   }
   1006 
   1007   // Shifts dst right, duplicating sign bit, by shift_amount bits.
   1008   // Shifting by 1 is handled efficiently.
   1009   void sarl(Register dst, Immediate shift_amount) {
   1010     shift_32(dst, shift_amount, 0x7);
   1011   }
   1012 
   1013   // Shifts dst right, duplicating sign bit, by cl % 64 bits.
   1014   void sar_cl(Register dst) {
   1015     shift(dst, 0x7);
   1016   }
   1017 
   1018   // Shifts dst right, duplicating sign bit, by cl % 64 bits.
   1019   void sarl_cl(Register dst) {
   1020     shift_32(dst, 0x7);
   1021   }
   1022 
   1023   void shl(Register dst, Immediate shift_amount) {
   1024     shift(dst, shift_amount, 0x4);
   1025   }
   1026 
   1027   void shl_cl(Register dst) {
   1028     shift(dst, 0x4);
   1029   }
   1030 
   1031   void shll_cl(Register dst) {
   1032     shift_32(dst, 0x4);
   1033   }
   1034 
   1035   void shll(Register dst, Immediate shift_amount) {
   1036     shift_32(dst, shift_amount, 0x4);
   1037   }
   1038 
   1039   void shr(Register dst, Immediate shift_amount) {
   1040     shift(dst, shift_amount, 0x5);
   1041   }
   1042 
   1043   void shr_cl(Register dst) {
   1044     shift(dst, 0x5);
   1045   }
   1046 
   1047   void shrl_cl(Register dst) {
   1048     shift_32(dst, 0x5);
   1049   }
   1050 
   1051   void shrl(Register dst, Immediate shift_amount) {
   1052     shift_32(dst, shift_amount, 0x5);
   1053   }
   1054 
   1055   void store_rax(void* dst, RelocInfo::Mode mode);
   1056   void store_rax(ExternalReference ref);
   1057 
   1058   void subq(Register dst, Register src) {
   1059     arithmetic_op(0x2B, dst, src);
   1060   }
   1061 
   1062   void subq(Register dst, const Operand& src) {
   1063     arithmetic_op(0x2B, dst, src);
   1064   }
   1065 
   1066   void subq(const Operand& dst, Register src) {
   1067     arithmetic_op(0x29, src, dst);
   1068   }
   1069 
   1070   void subq(Register dst, Immediate src) {
   1071     immediate_arithmetic_op(0x5, dst, src);
   1072   }
   1073 
   1074   void subq(const Operand& dst, Immediate src) {
   1075     immediate_arithmetic_op(0x5, dst, src);
   1076   }
   1077 
   1078   void subl(Register dst, Register src) {
   1079     arithmetic_op_32(0x2B, dst, src);
   1080   }
   1081 
   1082   void subl(Register dst, const Operand& src) {
   1083     arithmetic_op_32(0x2B, dst, src);
   1084   }
   1085 
   1086   void subl(const Operand& dst, Immediate src) {
   1087     immediate_arithmetic_op_32(0x5, dst, src);
   1088   }
   1089 
   1090   void subl(Register dst, Immediate src) {
   1091     immediate_arithmetic_op_32(0x5, dst, src);
   1092   }
   1093 
   1094   void subb(Register dst, Immediate src) {
   1095     immediate_arithmetic_op_8(0x5, dst, src);
   1096   }
   1097 
   1098   void testb(Register dst, Register src);
   1099   void testb(Register reg, Immediate mask);
   1100   void testb(const Operand& op, Immediate mask);
   1101   void testb(const Operand& op, Register reg);
   1102   void testl(Register dst, Register src);
   1103   void testl(Register reg, Immediate mask);
   1104   void testl(const Operand& op, Immediate mask);
   1105   void testq(const Operand& op, Register reg);
   1106   void testq(Register dst, Register src);
   1107   void testq(Register dst, Immediate mask);
   1108 
   1109   void xor_(Register dst, Register src) {
   1110     if (dst.code() == src.code()) {
   1111       arithmetic_op_32(0x33, dst, src);
   1112     } else {
   1113       arithmetic_op(0x33, dst, src);
   1114     }
   1115   }
   1116 
   1117   void xorl(Register dst, Register src) {
   1118     arithmetic_op_32(0x33, dst, src);
   1119   }
   1120 
   1121   void xorl(Register dst, const Operand& src) {
   1122     arithmetic_op_32(0x33, dst, src);
   1123   }
   1124 
   1125   void xorl(Register dst, Immediate src) {
   1126     immediate_arithmetic_op_32(0x6, dst, src);
   1127   }
   1128 
   1129   void xorl(const Operand& dst, Immediate src) {
   1130     immediate_arithmetic_op_32(0x6, dst, src);
   1131   }
   1132 
   1133   void xor_(Register dst, const Operand& src) {
   1134     arithmetic_op(0x33, dst, src);
   1135   }
   1136 
   1137   void xor_(const Operand& dst, Register src) {
   1138     arithmetic_op(0x31, src, dst);
   1139   }
   1140 
   1141   void xor_(Register dst, Immediate src) {
   1142     immediate_arithmetic_op(0x6, dst, src);
   1143   }
   1144 
   1145   void xor_(const Operand& dst, Immediate src) {
   1146     immediate_arithmetic_op(0x6, dst, src);
   1147   }
   1148 
   1149   // Bit operations.
   1150   void bt(const Operand& dst, Register src);
   1151   void bts(const Operand& dst, Register src);
   1152 
   1153   // Miscellaneous
   1154   void clc();
   1155   void cld();
   1156   void cpuid();
   1157   void hlt();
   1158   void int3();
   1159   void nop();
   1160   void nop(int n);
   1161   void rdtsc();
   1162   void ret(int imm16);
   1163   void setcc(Condition cc, Register reg);
   1164 
   1165   // Label operations & relative jumps (PPUM Appendix D)
   1166   //
   1167   // Takes a branch opcode (cc) and a label (L) and generates
   1168   // either a backward branch or a forward branch and links it
   1169   // to the label fixup chain. Usage:
   1170   //
   1171   // Label L;    // unbound label
   1172   // j(cc, &L);  // forward branch to unbound label
   1173   // bind(&L);   // bind label to the current pc
   1174   // j(cc, &L);  // backward branch to bound label
   1175   // bind(&L);   // illegal: a label may be bound only once
   1176   //
   1177   // Note: The same Label can be used for forward and backward branches
   1178   // but it may be bound only once.
   1179 
   1180   void bind(Label* L);  // binds an unbound label L to the current code position
   1181   void bind(NearLabel* L);
   1182 
   1183   // Calls
   1184   // Call near relative 32-bit displacement, relative to next instruction.
   1185   void call(Label* L);
   1186   void call(Handle<Code> target, RelocInfo::Mode rmode);
   1187 
   1188   // Calls directly to the given address using a relative offset.
   1189   // Should only ever be used in Code objects for calls within the
   1190   // same Code object. Should not be used when generating new code (use labels),
   1191   // but only when patching existing code.
   1192   void call(Address target);
   1193 
   1194   // Call near absolute indirect, address in register
   1195   void call(Register adr);
   1196 
   1197   // Call near indirect
   1198   void call(const Operand& operand);
   1199 
   1200   // Jumps
   1201   // Jump short or near relative.
   1202   // Use a 32-bit signed displacement.
   1203   void jmp(Label* L);  // unconditional jump to L
   1204   void jmp(Handle<Code> target, RelocInfo::Mode rmode);
   1205 
   1206   // Jump near absolute indirect (r64)
   1207   void jmp(Register adr);
   1208 
   1209   // Jump near absolute indirect (m64)
   1210   void jmp(const Operand& src);
   1211 
   1212   // Short jump
   1213   void jmp(NearLabel* L);
   1214 
   1215   // Conditional jumps
   1216   void j(Condition cc, Label* L);
   1217   void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
   1218 
   1219   // Conditional short jump
   1220   void j(Condition cc, NearLabel* L, Hint hint = no_hint);
   1221 
   1222   // Floating-point operations
   1223   void fld(int i);
   1224 
   1225   void fld1();
   1226   void fldz();
   1227   void fldpi();
   1228   void fldln2();
   1229 
   1230   void fld_s(const Operand& adr);
   1231   void fld_d(const Operand& adr);
   1232 
   1233   void fstp_s(const Operand& adr);
   1234   void fstp_d(const Operand& adr);
   1235   void fstp(int index);
   1236 
   1237   void fild_s(const Operand& adr);
   1238   void fild_d(const Operand& adr);
   1239 
   1240   void fist_s(const Operand& adr);
   1241 
   1242   void fistp_s(const Operand& adr);
   1243   void fistp_d(const Operand& adr);
   1244 
   1245   void fisttp_s(const Operand& adr);
   1246   void fisttp_d(const Operand& adr);
   1247 
   1248   void fabs();
   1249   void fchs();
   1250 
   1251   void fadd(int i);
   1252   void fsub(int i);
   1253   void fmul(int i);
   1254   void fdiv(int i);
   1255 
   1256   void fisub_s(const Operand& adr);
   1257 
   1258   void faddp(int i = 1);
   1259   void fsubp(int i = 1);
   1260   void fsubrp(int i = 1);
   1261   void fmulp(int i = 1);
   1262   void fdivp(int i = 1);
   1263   void fprem();
   1264   void fprem1();
   1265 
   1266   void fxch(int i = 1);
   1267   void fincstp();
   1268   void ffree(int i = 0);
   1269 
   1270   void ftst();
   1271   void fucomp(int i);
   1272   void fucompp();
   1273   void fucomi(int i);
   1274   void fucomip();
   1275 
   1276   void fcompp();
   1277   void fnstsw_ax();
   1278   void fwait();
   1279   void fnclex();
   1280 
   1281   void fsin();
   1282   void fcos();
   1283   void fyl2x();
   1284 
   1285   void frndint();
   1286 
   1287   void sahf();
   1288 
   1289   // SSE2 instructions
   1290   void movd(XMMRegister dst, Register src);
   1291   void movd(Register dst, XMMRegister src);
   1292   void movq(XMMRegister dst, Register src);
   1293   void movq(Register dst, XMMRegister src);
   1294   void extractps(Register dst, XMMRegister src, byte imm8);
   1295 
   1296   void movsd(const Operand& dst, XMMRegister src);
   1297   void movsd(XMMRegister dst, XMMRegister src);
   1298   void movsd(XMMRegister dst, const Operand& src);
   1299 
   1300   void movdqa(const Operand& dst, XMMRegister src);
   1301   void movdqa(XMMRegister dst, const Operand& src);
   1302 
   1303   void movss(XMMRegister dst, const Operand& src);
   1304   void movss(const Operand& dst, XMMRegister src);
   1305 
   1306   void cvttss2si(Register dst, const Operand& src);
   1307   void cvttss2si(Register dst, XMMRegister src);
   1308   void cvttsd2si(Register dst, const Operand& src);
   1309   void cvttsd2si(Register dst, XMMRegister src);
   1310   void cvttsd2siq(Register dst, XMMRegister src);
   1311 
   1312   void cvtlsi2sd(XMMRegister dst, const Operand& src);
   1313   void cvtlsi2sd(XMMRegister dst, Register src);
   1314   void cvtqsi2sd(XMMRegister dst, const Operand& src);
   1315   void cvtqsi2sd(XMMRegister dst, Register src);
   1316 
   1317   void cvtlsi2ss(XMMRegister dst, Register src);
   1318 
   1319   void cvtss2sd(XMMRegister dst, XMMRegister src);
   1320   void cvtss2sd(XMMRegister dst, const Operand& src);
   1321   void cvtsd2ss(XMMRegister dst, XMMRegister src);
   1322 
   1323   void cvtsd2si(Register dst, XMMRegister src);
   1324   void cvtsd2siq(Register dst, XMMRegister src);
   1325 
   1326   void addsd(XMMRegister dst, XMMRegister src);
   1327   void subsd(XMMRegister dst, XMMRegister src);
   1328   void mulsd(XMMRegister dst, XMMRegister src);
   1329   void divsd(XMMRegister dst, XMMRegister src);
   1330 
   1331   void andpd(XMMRegister dst, XMMRegister src);
   1332   void orpd(XMMRegister dst, XMMRegister src);
   1333   void xorpd(XMMRegister dst, XMMRegister src);
   1334   void sqrtsd(XMMRegister dst, XMMRegister src);
   1335 
   1336   void ucomisd(XMMRegister dst, XMMRegister src);
   1337   void ucomisd(XMMRegister dst, const Operand& src);
   1338 
   1339   void movmskpd(Register dst, XMMRegister src);
   1340 
   1341   // The first argument is the reg field, the second argument is the r/m field.
   1342   void emit_sse_operand(XMMRegister dst, XMMRegister src);
   1343   void emit_sse_operand(XMMRegister reg, const Operand& adr);
   1344   void emit_sse_operand(XMMRegister dst, Register src);
   1345   void emit_sse_operand(Register dst, XMMRegister src);
   1346 
   1347   // Debugging
   1348   void Print();
   1349 
   1350   // Check the code size generated from label to here.
   1351   int SizeOfCodeGeneratedSince(Label* l) { return pc_offset() - l->pos(); }
   1352 
   1353   // Mark address of the ExitJSFrame code.
   1354   void RecordJSReturn();
   1355 
   1356   // Mark address of a debug break slot.
   1357   void RecordDebugBreakSlot();
   1358 
   1359   // Record a comment relocation entry that can be used by a disassembler.
   1360   // Use --code-comments to enable.
   1361   void RecordComment(const char* msg, bool force = false);
   1362 
   1363   // Writes a single word of data in the code stream.
   1364   // Used for inline tables, e.g., jump-tables.
   1365   void db(uint8_t data);
   1366   void dd(uint32_t data);
   1367 
   1368   int pc_offset() const { return static_cast<int>(pc_ - buffer_); }
   1369 
   1370   PositionsRecorder* positions_recorder() { return &positions_recorder_; }
   1371 
   1372   // Check if there is less than kGap bytes available in the buffer.
   1373   // If this is the case, we need to grow the buffer before emitting
   1374   // an instruction or relocation information.
   1375   inline bool buffer_overflow() const {
   1376     return pc_ >= reloc_info_writer.pos() - kGap;
   1377   }
   1378 
   1379   // Get the number of bytes available in the buffer.
   1380   inline int available_space() const {
   1381     return static_cast<int>(reloc_info_writer.pos() - pc_);
   1382   }
   1383 
   1384   static bool IsNop(Address addr) { return *addr == 0x90; }
   1385 
   1386   // Avoid overflows for displacements etc.
   1387   static const int kMaximalBufferSize = 512*MB;
   1388   static const int kMinimalBufferSize = 4*KB;
   1389 
   1390  protected:
   1391   bool emit_debug_code() const { return emit_debug_code_; }
   1392 
   1393  private:
   1394   byte* addr_at(int pos)  { return buffer_ + pos; }
   1395   byte byte_at(int pos)  { return buffer_[pos]; }
   1396   void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
   1397   uint32_t long_at(int pos)  {
   1398     return *reinterpret_cast<uint32_t*>(addr_at(pos));
   1399   }
   1400   void long_at_put(int pos, uint32_t x)  {
   1401     *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
   1402   }
   1403 
   1404   // code emission
   1405   void GrowBuffer();
   1406 
   1407   void emit(byte x) { *pc_++ = x; }
   1408   inline void emitl(uint32_t x);
   1409   inline void emitq(uint64_t x, RelocInfo::Mode rmode);
   1410   inline void emitw(uint16_t x);
   1411   inline void emit_code_target(Handle<Code> target, RelocInfo::Mode rmode);
   1412   void emit(Immediate x) { emitl(x.value_); }
   1413 
   1414   // Emits a REX prefix that encodes a 64-bit operand size and
   1415   // the top bit of both register codes.
   1416   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
   1417   // REX.W is set.
   1418   inline void emit_rex_64(XMMRegister reg, Register rm_reg);
   1419   inline void emit_rex_64(Register reg, XMMRegister rm_reg);
   1420   inline void emit_rex_64(Register reg, Register rm_reg);
   1421 
   1422   // Emits a REX prefix that encodes a 64-bit operand size and
   1423   // the top bit of the destination, index, and base register codes.
   1424   // The high bit of reg is used for REX.R, the high bit of op's base
   1425   // register is used for REX.B, and the high bit of op's index register
   1426   // is used for REX.X.  REX.W is set.
   1427   inline void emit_rex_64(Register reg, const Operand& op);
   1428   inline void emit_rex_64(XMMRegister reg, const Operand& op);
   1429 
   1430   // Emits a REX prefix that encodes a 64-bit operand size and
   1431   // the top bit of the register code.
   1432   // The high bit of register is used for REX.B.
   1433   // REX.W is set and REX.R and REX.X are clear.
   1434   inline void emit_rex_64(Register rm_reg);
   1435 
   1436   // Emits a REX prefix that encodes a 64-bit operand size and
   1437   // the top bit of the index and base register codes.
   1438   // The high bit of op's base register is used for REX.B, and the high
   1439   // bit of op's index register is used for REX.X.
   1440   // REX.W is set and REX.R clear.
   1441   inline void emit_rex_64(const Operand& op);
   1442 
   1443   // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
   1444   void emit_rex_64() { emit(0x48); }
   1445 
   1446   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
   1447   // REX.W is clear.
   1448   inline void emit_rex_32(Register reg, Register rm_reg);
   1449 
   1450   // The high bit of reg is used for REX.R, the high bit of op's base
   1451   // register is used for REX.B, and the high bit of op's index register
   1452   // is used for REX.X.  REX.W is cleared.
   1453   inline void emit_rex_32(Register reg, const Operand& op);
   1454 
   1455   // High bit of rm_reg goes to REX.B.
   1456   // REX.W, REX.R and REX.X are clear.
   1457   inline void emit_rex_32(Register rm_reg);
   1458 
   1459   // High bit of base goes to REX.B and high bit of index to REX.X.
   1460   // REX.W and REX.R are clear.
   1461   inline void emit_rex_32(const Operand& op);
   1462 
   1463   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
   1464   // REX.W is cleared.  If no REX bits are set, no byte is emitted.
   1465   inline void emit_optional_rex_32(Register reg, Register rm_reg);
   1466 
   1467   // The high bit of reg is used for REX.R, the high bit of op's base
   1468   // register is used for REX.B, and the high bit of op's index register
   1469   // is used for REX.X.  REX.W is cleared.  If no REX bits are set, nothing
   1470   // is emitted.
   1471   inline void emit_optional_rex_32(Register reg, const Operand& op);
   1472 
   1473   // As for emit_optional_rex_32(Register, Register), except that
   1474   // the registers are XMM registers.
   1475   inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
   1476 
   1477   // As for emit_optional_rex_32(Register, Register), except that
   1478   // one of the registers is an XMM registers.
   1479   inline void emit_optional_rex_32(XMMRegister reg, Register base);
   1480 
   1481   // As for emit_optional_rex_32(Register, Register), except that
   1482   // one of the registers is an XMM registers.
   1483   inline void emit_optional_rex_32(Register reg, XMMRegister base);
   1484 
   1485   // As for emit_optional_rex_32(Register, const Operand&), except that
   1486   // the register is an XMM register.
   1487   inline void emit_optional_rex_32(XMMRegister reg, const Operand& op);
   1488 
   1489   // Optionally do as emit_rex_32(Register) if the register number has
   1490   // the high bit set.
   1491   inline void emit_optional_rex_32(Register rm_reg);
   1492 
   1493   // Optionally do as emit_rex_32(const Operand&) if the operand register
   1494   // numbers have a high bit set.
   1495   inline void emit_optional_rex_32(const Operand& op);
   1496 
   1497 
   1498   // Emit the ModR/M byte, and optionally the SIB byte and
   1499   // 1- or 4-byte offset for a memory operand.  Also encodes
   1500   // the second operand of the operation, a register or operation
   1501   // subcode, into the reg field of the ModR/M byte.
   1502   void emit_operand(Register reg, const Operand& adr) {
   1503     emit_operand(reg.low_bits(), adr);
   1504   }
   1505 
   1506   // Emit the ModR/M byte, and optionally the SIB byte and
   1507   // 1- or 4-byte offset for a memory operand.  Also used to encode
   1508   // a three-bit opcode extension into the ModR/M byte.
   1509   void emit_operand(int rm, const Operand& adr);
   1510 
   1511   // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
   1512   void emit_modrm(Register reg, Register rm_reg) {
   1513     emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
   1514   }
   1515 
   1516   // Emit a ModR/M byte with an operation subcode in the reg field and
   1517   // a register in the rm_reg field.
   1518   void emit_modrm(int code, Register rm_reg) {
   1519     ASSERT(is_uint3(code));
   1520     emit(0xC0 | code << 3 | rm_reg.low_bits());
   1521   }
   1522 
   1523   // Emit the code-object-relative offset of the label's position
   1524   inline void emit_code_relative_offset(Label* label);
   1525 
   1526   // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
   1527   // AND, OR, XOR, or CMP.  The encodings of these operations are all
   1528   // similar, differing just in the opcode or in the reg field of the
   1529   // ModR/M byte.
   1530   void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
   1531   void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
   1532   void arithmetic_op_32(byte opcode, Register reg, Register rm_reg);
   1533   void arithmetic_op_32(byte opcode, Register reg, const Operand& rm_reg);
   1534   void arithmetic_op(byte opcode, Register reg, Register rm_reg);
   1535   void arithmetic_op(byte opcode, Register reg, const Operand& rm_reg);
   1536   void immediate_arithmetic_op(byte subcode, Register dst, Immediate src);
   1537   void immediate_arithmetic_op(byte subcode, const Operand& dst, Immediate src);
   1538   // Operate on a byte in memory or register.
   1539   void immediate_arithmetic_op_8(byte subcode,
   1540                                  Register dst,
   1541                                  Immediate src);
   1542   void immediate_arithmetic_op_8(byte subcode,
   1543                                  const Operand& dst,
   1544                                  Immediate src);
   1545   // Operate on a word in memory or register.
   1546   void immediate_arithmetic_op_16(byte subcode,
   1547                                   Register dst,
   1548                                   Immediate src);
   1549   void immediate_arithmetic_op_16(byte subcode,
   1550                                   const Operand& dst,
   1551                                   Immediate src);
   1552   // Operate on a 32-bit word in memory or register.
   1553   void immediate_arithmetic_op_32(byte subcode,
   1554                                   Register dst,
   1555                                   Immediate src);
   1556   void immediate_arithmetic_op_32(byte subcode,
   1557                                   const Operand& dst,
   1558                                   Immediate src);
   1559 
   1560   // Emit machine code for a shift operation.
   1561   void shift(Register dst, Immediate shift_amount, int subcode);
   1562   void shift_32(Register dst, Immediate shift_amount, int subcode);
   1563   // Shift dst by cl % 64 bits.
   1564   void shift(Register dst, int subcode);
   1565   void shift_32(Register dst, int subcode);
   1566 
   1567   void emit_farith(int b1, int b2, int i);
   1568 
   1569   // labels
   1570   // void print(Label* L);
   1571   void bind_to(Label* L, int pos);
   1572 
   1573   // record reloc info for current pc_
   1574   void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
   1575 
   1576   friend class CodePatcher;
   1577   friend class EnsureSpace;
   1578   friend class RegExpMacroAssemblerX64;
   1579 
   1580   // Code buffer:
   1581   // The buffer into which code and relocation info are generated.
   1582   byte* buffer_;
   1583   int buffer_size_;
   1584   // True if the assembler owns the buffer, false if buffer is external.
   1585   bool own_buffer_;
   1586 
   1587   // code generation
   1588   byte* pc_;  // the program counter; moves forward
   1589   RelocInfoWriter reloc_info_writer;
   1590 
   1591   List< Handle<Code> > code_targets_;
   1592 
   1593   PositionsRecorder positions_recorder_;
   1594 
   1595   bool emit_debug_code_;
   1596 
   1597   friend class PositionsRecorder;
   1598 };
   1599 
   1600 
   1601 // Helper class that ensures that there is enough space for generating
   1602 // instructions and relocation information.  The constructor makes
   1603 // sure that there is enough space and (in debug mode) the destructor
   1604 // checks that we did not generate too much.
   1605 class EnsureSpace BASE_EMBEDDED {
   1606  public:
   1607   explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
   1608     if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
   1609 #ifdef DEBUG
   1610     space_before_ = assembler_->available_space();
   1611 #endif
   1612   }
   1613 
   1614 #ifdef DEBUG
   1615   ~EnsureSpace() {
   1616     int bytes_generated = space_before_ - assembler_->available_space();
   1617     ASSERT(bytes_generated < assembler_->kGap);
   1618   }
   1619 #endif
   1620 
   1621  private:
   1622   Assembler* assembler_;
   1623 #ifdef DEBUG
   1624   int space_before_;
   1625 #endif
   1626 };
   1627 
   1628 } }  // namespace v8::internal
   1629 
   1630 #endif  // V8_X64_ASSEMBLER_X64_H_
   1631