Home | History | Annotate | Download | only in x64
      1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
      2 // All Rights Reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 // - Redistributions of source code must retain the above copyright notice,
      9 // this list of conditions and the following disclaimer.
     10 //
     11 // - Redistribution in binary form must reproduce the above copyright
     12 // notice, this list of conditions and the following disclaimer in the
     13 // documentation and/or other materials provided with the distribution.
     14 //
     15 // - Neither the name of Sun Microsystems or the names of contributors may
     16 // be used to endorse or promote products derived from this software without
     17 // specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
     20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
     21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
     23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // The original source code covered by the above license above has been
     32 // modified significantly by Google Inc.
     33 // Copyright 2012 the V8 project authors. All rights reserved.
     34 
     35 // A lightweight X64 Assembler.
     36 
     37 #ifndef V8_X64_ASSEMBLER_X64_H_
     38 #define V8_X64_ASSEMBLER_X64_H_
     39 
     40 #include "serialize.h"
     41 
     42 namespace v8 {
     43 namespace internal {
     44 
     45 // Utility functions
     46 
     47 // Test whether a 64-bit value is in a specific range.
     48 inline bool is_uint32(int64_t x) {
     49   static const uint64_t kMaxUInt32 = V8_UINT64_C(0xffffffff);
     50   return static_cast<uint64_t>(x) <= kMaxUInt32;
     51 }
     52 
     53 inline bool is_int32(int64_t x) {
     54   static const int64_t kMinInt32 = -V8_INT64_C(0x80000000);
     55   return is_uint32(x - kMinInt32);
     56 }
     57 
     58 inline bool uint_is_int32(uint64_t x) {
     59   static const uint64_t kMaxInt32 = V8_UINT64_C(0x7fffffff);
     60   return x <= kMaxInt32;
     61 }
     62 
     63 inline bool is_uint32(uint64_t x) {
     64   static const uint64_t kMaxUInt32 = V8_UINT64_C(0xffffffff);
     65   return x <= kMaxUInt32;
     66 }
     67 
     68 // CPU Registers.
     69 //
     70 // 1) We would prefer to use an enum, but enum values are assignment-
     71 // compatible with int, which has caused code-generation bugs.
     72 //
     73 // 2) We would prefer to use a class instead of a struct but we don't like
     74 // the register initialization to depend on the particular initialization
     75 // order (which appears to be different on OS X, Linux, and Windows for the
     76 // installed versions of C++ we tried). Using a struct permits C-style
     77 // "initialization". Also, the Register objects cannot be const as this
     78 // forces initialization stubs in MSVC, making us dependent on initialization
     79 // order.
     80 //
     81 // 3) By not using an enum, we are possibly preventing the compiler from
     82 // doing certain constant folds, which may significantly reduce the
     83 // code generated for some assembly instructions (because they boil down
     84 // to a few constants). If this is a problem, we could change the code
     85 // such that we use an enum in optimized mode, and the struct in debug
     86 // mode. This way we get the compile-time error checking in debug mode
     87 // and best performance in optimized code.
     88 //
     89 
     90 struct Register {
     91   // The non-allocatable registers are:
     92   //  rsp - stack pointer
     93   //  rbp - frame pointer
     94   //  rsi - context register
     95   //  r10 - fixed scratch register
     96   //  r12 - smi constant register
     97   //  r13 - root register
     98   static const int kNumRegisters = 16;
     99   static const int kNumAllocatableRegisters = 10;
    100 
    101   static int ToAllocationIndex(Register reg) {
    102     return kAllocationIndexByRegisterCode[reg.code()];
    103   }
    104 
    105   static Register FromAllocationIndex(int index) {
    106     ASSERT(index >= 0 && index < kNumAllocatableRegisters);
    107     Register result = { kRegisterCodeByAllocationIndex[index] };
    108     return result;
    109   }
    110 
    111   static const char* AllocationIndexToString(int index) {
    112     ASSERT(index >= 0 && index < kNumAllocatableRegisters);
    113     const char* const names[] = {
    114       "rax",
    115       "rbx",
    116       "rdx",
    117       "rcx",
    118       "rdi",
    119       "r8",
    120       "r9",
    121       "r11",
    122       "r14",
    123       "r15"
    124     };
    125     return names[index];
    126   }
    127 
    128   static Register from_code(int code) {
    129     Register r = { code };
    130     return r;
    131   }
    132   bool is_valid() const { return 0 <= code_ && code_ < kNumRegisters; }
    133   bool is(Register reg) const { return code_ == reg.code_; }
    134   // rax, rbx, rcx and rdx are byte registers, the rest are not.
    135   bool is_byte_register() const { return code_ <= 3; }
    136   int code() const {
    137     ASSERT(is_valid());
    138     return code_;
    139   }
    140   int bit() const {
    141     return 1 << code_;
    142   }
    143 
    144   // Return the high bit of the register code as a 0 or 1.  Used often
    145   // when constructing the REX prefix byte.
    146   int high_bit() const {
    147     return code_ >> 3;
    148   }
    149   // Return the 3 low bits of the register code.  Used when encoding registers
    150   // in modR/M, SIB, and opcode bytes.
    151   int low_bits() const {
    152     return code_ & 0x7;
    153   }
    154 
    155   // Unfortunately we can't make this private in a struct when initializing
    156   // by assignment.
    157   int code_;
    158 
    159  private:
    160   static const int kRegisterCodeByAllocationIndex[kNumAllocatableRegisters];
    161   static const int kAllocationIndexByRegisterCode[kNumRegisters];
    162 };
    163 
    164 const int kRegister_rax_Code = 0;
    165 const int kRegister_rcx_Code = 1;
    166 const int kRegister_rdx_Code = 2;
    167 const int kRegister_rbx_Code = 3;
    168 const int kRegister_rsp_Code = 4;
    169 const int kRegister_rbp_Code = 5;
    170 const int kRegister_rsi_Code = 6;
    171 const int kRegister_rdi_Code = 7;
    172 const int kRegister_r8_Code = 8;
    173 const int kRegister_r9_Code = 9;
    174 const int kRegister_r10_Code = 10;
    175 const int kRegister_r11_Code = 11;
    176 const int kRegister_r12_Code = 12;
    177 const int kRegister_r13_Code = 13;
    178 const int kRegister_r14_Code = 14;
    179 const int kRegister_r15_Code = 15;
    180 const int kRegister_no_reg_Code = -1;
    181 
    182 const Register rax = { kRegister_rax_Code };
    183 const Register rcx = { kRegister_rcx_Code };
    184 const Register rdx = { kRegister_rdx_Code };
    185 const Register rbx = { kRegister_rbx_Code };
    186 const Register rsp = { kRegister_rsp_Code };
    187 const Register rbp = { kRegister_rbp_Code };
    188 const Register rsi = { kRegister_rsi_Code };
    189 const Register rdi = { kRegister_rdi_Code };
    190 const Register r8 = { kRegister_r8_Code };
    191 const Register r9 = { kRegister_r9_Code };
    192 const Register r10 = { kRegister_r10_Code };
    193 const Register r11 = { kRegister_r11_Code };
    194 const Register r12 = { kRegister_r12_Code };
    195 const Register r13 = { kRegister_r13_Code };
    196 const Register r14 = { kRegister_r14_Code };
    197 const Register r15 = { kRegister_r15_Code };
    198 const Register no_reg = { kRegister_no_reg_Code };
    199 
    200 
    201 struct XMMRegister {
    202   static const int kNumRegisters = 16;
    203   static const int kNumAllocatableRegisters = 15;
    204 
    205   static int ToAllocationIndex(XMMRegister reg) {
    206     ASSERT(reg.code() != 0);
    207     return reg.code() - 1;
    208   }
    209 
    210   static XMMRegister FromAllocationIndex(int index) {
    211     ASSERT(0 <= index && index < kNumAllocatableRegisters);
    212     XMMRegister result = { index + 1 };
    213     return result;
    214   }
    215 
    216   static const char* AllocationIndexToString(int index) {
    217     ASSERT(index >= 0 && index < kNumAllocatableRegisters);
    218     const char* const names[] = {
    219       "xmm1",
    220       "xmm2",
    221       "xmm3",
    222       "xmm4",
    223       "xmm5",
    224       "xmm6",
    225       "xmm7",
    226       "xmm8",
    227       "xmm9",
    228       "xmm10",
    229       "xmm11",
    230       "xmm12",
    231       "xmm13",
    232       "xmm14",
    233       "xmm15"
    234     };
    235     return names[index];
    236   }
    237 
    238   static XMMRegister from_code(int code) {
    239     ASSERT(code >= 0);
    240     ASSERT(code < kNumRegisters);
    241     XMMRegister r = { code };
    242     return r;
    243   }
    244   bool is_valid() const { return 0 <= code_ && code_ < kNumRegisters; }
    245   bool is(XMMRegister reg) const { return code_ == reg.code_; }
    246   int code() const {
    247     ASSERT(is_valid());
    248     return code_;
    249   }
    250 
    251   // Return the high bit of the register code as a 0 or 1.  Used often
    252   // when constructing the REX prefix byte.
    253   int high_bit() const {
    254     return code_ >> 3;
    255   }
    256   // Return the 3 low bits of the register code.  Used when encoding registers
    257   // in modR/M, SIB, and opcode bytes.
    258   int low_bits() const {
    259     return code_ & 0x7;
    260   }
    261 
    262   int code_;
    263 };
    264 
    265 const XMMRegister xmm0 = { 0 };
    266 const XMMRegister xmm1 = { 1 };
    267 const XMMRegister xmm2 = { 2 };
    268 const XMMRegister xmm3 = { 3 };
    269 const XMMRegister xmm4 = { 4 };
    270 const XMMRegister xmm5 = { 5 };
    271 const XMMRegister xmm6 = { 6 };
    272 const XMMRegister xmm7 = { 7 };
    273 const XMMRegister xmm8 = { 8 };
    274 const XMMRegister xmm9 = { 9 };
    275 const XMMRegister xmm10 = { 10 };
    276 const XMMRegister xmm11 = { 11 };
    277 const XMMRegister xmm12 = { 12 };
    278 const XMMRegister xmm13 = { 13 };
    279 const XMMRegister xmm14 = { 14 };
    280 const XMMRegister xmm15 = { 15 };
    281 
    282 
    283 typedef XMMRegister DoubleRegister;
    284 
    285 
    286 enum Condition {
    287   // any value < 0 is considered no_condition
    288   no_condition  = -1,
    289 
    290   overflow      =  0,
    291   no_overflow   =  1,
    292   below         =  2,
    293   above_equal   =  3,
    294   equal         =  4,
    295   not_equal     =  5,
    296   below_equal   =  6,
    297   above         =  7,
    298   negative      =  8,
    299   positive      =  9,
    300   parity_even   = 10,
    301   parity_odd    = 11,
    302   less          = 12,
    303   greater_equal = 13,
    304   less_equal    = 14,
    305   greater       = 15,
    306 
    307   // Fake conditions that are handled by the
    308   // opcodes using them.
    309   always        = 16,
    310   never         = 17,
    311   // aliases
    312   carry         = below,
    313   not_carry     = above_equal,
    314   zero          = equal,
    315   not_zero      = not_equal,
    316   sign          = negative,
    317   not_sign      = positive,
    318   last_condition = greater
    319 };
    320 
    321 
    322 // Returns the equivalent of !cc.
    323 // Negation of the default no_condition (-1) results in a non-default
    324 // no_condition value (-2). As long as tests for no_condition check
    325 // for condition < 0, this will work as expected.
    326 inline Condition NegateCondition(Condition cc) {
    327   return static_cast<Condition>(cc ^ 1);
    328 }
    329 
    330 
    331 // Corresponds to transposing the operands of a comparison.
    332 inline Condition ReverseCondition(Condition cc) {
    333   switch (cc) {
    334     case below:
    335       return above;
    336     case above:
    337       return below;
    338     case above_equal:
    339       return below_equal;
    340     case below_equal:
    341       return above_equal;
    342     case less:
    343       return greater;
    344     case greater:
    345       return less;
    346     case greater_equal:
    347       return less_equal;
    348     case less_equal:
    349       return greater_equal;
    350     default:
    351       return cc;
    352   };
    353 }
    354 
    355 
    356 // -----------------------------------------------------------------------------
    357 // Machine instruction Immediates
    358 
    359 class Immediate BASE_EMBEDDED {
    360  public:
    361   explicit Immediate(int32_t value) : value_(value) {}
    362 
    363  private:
    364   int32_t value_;
    365 
    366   friend class Assembler;
    367 };
    368 
    369 
    370 // -----------------------------------------------------------------------------
    371 // Machine instruction Operands
    372 
    373 enum ScaleFactor {
    374   times_1 = 0,
    375   times_2 = 1,
    376   times_4 = 2,
    377   times_8 = 3,
    378   times_int_size = times_4,
    379   times_pointer_size = times_8
    380 };
    381 
    382 
    383 class Operand BASE_EMBEDDED {
    384  public:
    385   // [base + disp/r]
    386   Operand(Register base, int32_t disp);
    387 
    388   // [base + index*scale + disp/r]
    389   Operand(Register base,
    390           Register index,
    391           ScaleFactor scale,
    392           int32_t disp);
    393 
    394   // [index*scale + disp/r]
    395   Operand(Register index,
    396           ScaleFactor scale,
    397           int32_t disp);
    398 
    399   // Offset from existing memory operand.
    400   // Offset is added to existing displacement as 32-bit signed values and
    401   // this must not overflow.
    402   Operand(const Operand& base, int32_t offset);
    403 
    404   // Checks whether either base or index register is the given register.
    405   // Does not check the "reg" part of the Operand.
    406   bool AddressUsesRegister(Register reg) const;
    407 
    408   // Queries related to the size of the generated instruction.
    409   // Whether the generated instruction will have a REX prefix.
    410   bool requires_rex() const { return rex_ != 0; }
    411   // Size of the ModR/M, SIB and displacement parts of the generated
    412   // instruction.
    413   int operand_size() const { return len_; }
    414 
    415  private:
    416   byte rex_;
    417   byte buf_[6];
    418   // The number of bytes of buf_ in use.
    419   byte len_;
    420 
    421   // Set the ModR/M byte without an encoded 'reg' register. The
    422   // register is encoded later as part of the emit_operand operation.
    423   // set_modrm can be called before or after set_sib and set_disp*.
    424   inline void set_modrm(int mod, Register rm);
    425 
    426   // Set the SIB byte if one is needed. Sets the length to 2 rather than 1.
    427   inline void set_sib(ScaleFactor scale, Register index, Register base);
    428 
    429   // Adds operand displacement fields (offsets added to the memory address).
    430   // Needs to be called after set_sib, not before it.
    431   inline void set_disp8(int disp);
    432   inline void set_disp32(int disp);
    433 
    434   friend class Assembler;
    435 };
    436 
    437 
    438 // CpuFeatures keeps track of which features are supported by the target CPU.
    439 // Supported features must be enabled by a Scope before use.
    440 // Example:
    441 //   if (CpuFeatures::IsSupported(SSE3)) {
    442 //     CpuFeatures::Scope fscope(SSE3);
    443 //     // Generate SSE3 floating point code.
    444 //   } else {
    445 //     // Generate standard x87 or SSE2 floating point code.
    446 //   }
    447 class CpuFeatures : public AllStatic {
    448  public:
    449   // Detect features of the target CPU. Set safe defaults if the serializer
    450   // is enabled (snapshots must be portable).
    451   static void Probe();
    452 
    453   // Check whether a feature is supported by the target CPU.
    454   static bool IsSupported(CpuFeature f) {
    455     ASSERT(initialized_);
    456     if (f == SSE2 && !FLAG_enable_sse2) return false;
    457     if (f == SSE3 && !FLAG_enable_sse3) return false;
    458     if (f == CMOV && !FLAG_enable_cmov) return false;
    459     if (f == RDTSC && !FLAG_enable_rdtsc) return false;
    460     if (f == SAHF && !FLAG_enable_sahf) return false;
    461     return (supported_ & (V8_UINT64_C(1) << f)) != 0;
    462   }
    463 
    464 #ifdef DEBUG
    465   // Check whether a feature is currently enabled.
    466   static bool IsEnabled(CpuFeature f) {
    467     ASSERT(initialized_);
    468     Isolate* isolate = Isolate::UncheckedCurrent();
    469     if (isolate == NULL) {
    470       // When no isolate is available, work as if we're running in
    471       // release mode.
    472       return IsSupported(f);
    473     }
    474     uint64_t enabled = isolate->enabled_cpu_features();
    475     return (enabled & (V8_UINT64_C(1) << f)) != 0;
    476   }
    477 #endif
    478 
    479   // Enable a specified feature within a scope.
    480   class Scope BASE_EMBEDDED {
    481 #ifdef DEBUG
    482 
    483    public:
    484     explicit Scope(CpuFeature f) {
    485       uint64_t mask = V8_UINT64_C(1) << f;
    486       ASSERT(CpuFeatures::IsSupported(f));
    487       ASSERT(!Serializer::enabled() ||
    488              (CpuFeatures::found_by_runtime_probing_ & mask) == 0);
    489       isolate_ = Isolate::UncheckedCurrent();
    490       old_enabled_ = 0;
    491       if (isolate_ != NULL) {
    492         old_enabled_ = isolate_->enabled_cpu_features();
    493         isolate_->set_enabled_cpu_features(old_enabled_ | mask);
    494       }
    495     }
    496     ~Scope() {
    497       ASSERT_EQ(Isolate::UncheckedCurrent(), isolate_);
    498       if (isolate_ != NULL) {
    499         isolate_->set_enabled_cpu_features(old_enabled_);
    500       }
    501     }
    502 
    503    private:
    504     Isolate* isolate_;
    505     uint64_t old_enabled_;
    506 #else
    507 
    508    public:
    509     explicit Scope(CpuFeature f) {}
    510 #endif
    511   };
    512 
    513  private:
    514   // Safe defaults include SSE2 and CMOV for X64. It is always available, if
    515   // anyone checks, but they shouldn't need to check.
    516   // The required user mode extensions in X64 are (from AMD64 ABI Table A.1):
    517   //   fpu, tsc, cx8, cmov, mmx, sse, sse2, fxsr, syscall
    518   static const uint64_t kDefaultCpuFeatures = (1 << SSE2 | 1 << CMOV);
    519 
    520 #ifdef DEBUG
    521   static bool initialized_;
    522 #endif
    523   static uint64_t supported_;
    524   static uint64_t found_by_runtime_probing_;
    525 
    526   DISALLOW_COPY_AND_ASSIGN(CpuFeatures);
    527 };
    528 
    529 
    530 class Assembler : public AssemblerBase {
    531  private:
    532   // We check before assembling an instruction that there is sufficient
    533   // space to write an instruction and its relocation information.
    534   // The relocation writer's position must be kGap bytes above the end of
    535   // the generated instructions. This leaves enough space for the
    536   // longest possible x64 instruction, 15 bytes, and the longest possible
    537   // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
    538   // (There is a 15 byte limit on x64 instruction length that rules out some
    539   // otherwise valid instructions.)
    540   // This allows for a single, fast space check per instruction.
    541   static const int kGap = 32;
    542 
    543  public:
    544   // Create an assembler. Instructions and relocation information are emitted
    545   // into a buffer, with the instructions starting from the beginning and the
    546   // relocation information starting from the end of the buffer. See CodeDesc
    547   // for a detailed comment on the layout (globals.h).
    548   //
    549   // If the provided buffer is NULL, the assembler allocates and grows its own
    550   // buffer, and buffer_size determines the initial buffer size. The buffer is
    551   // owned by the assembler and deallocated upon destruction of the assembler.
    552   //
    553   // If the provided buffer is not NULL, the assembler uses the provided buffer
    554   // for code generation and assumes its size to be buffer_size. If the buffer
    555   // is too small, a fatal error occurs. No deallocation of the buffer is done
    556   // upon destruction of the assembler.
    557   Assembler(Isolate* isolate, void* buffer, int buffer_size);
    558   ~Assembler();
    559 
    560   // Overrides the default provided by FLAG_debug_code.
    561   void set_emit_debug_code(bool value) { emit_debug_code_ = value; }
    562 
    563   // GetCode emits any pending (non-emitted) code and fills the descriptor
    564   // desc. GetCode() is idempotent; it returns the same result if no other
    565   // Assembler functions are invoked in between GetCode() calls.
    566   void GetCode(CodeDesc* desc);
    567 
    568   // Read/Modify the code target in the relative branch/call instruction at pc.
    569   // On the x64 architecture, we use relative jumps with a 32-bit displacement
    570   // to jump to other Code objects in the Code space in the heap.
    571   // Jumps to C functions are done indirectly through a 64-bit register holding
    572   // the absolute address of the target.
    573   // These functions convert between absolute Addresses of Code objects and
    574   // the relative displacements stored in the code.
    575   static inline Address target_address_at(Address pc);
    576   static inline void set_target_address_at(Address pc, Address target);
    577 
    578   // This sets the branch destination (which is in the instruction on x64).
    579   // This is for calls and branches within generated code.
    580   inline static void deserialization_set_special_target_at(
    581       Address instruction_payload, Address target) {
    582     set_target_address_at(instruction_payload, target);
    583   }
    584 
    585   // This sets the branch destination (which is a load instruction on x64).
    586   // This is for calls and branches to runtime code.
    587   inline static void set_external_target_at(Address instruction_payload,
    588                                             Address target) {
    589     *reinterpret_cast<Address*>(instruction_payload) = target;
    590   }
    591 
    592   inline Handle<Object> code_target_object_handle_at(Address pc);
    593   // Number of bytes taken up by the branch target in the code.
    594   static const int kSpecialTargetSize = 4;  // Use 32-bit displacement.
    595   // Distance between the address of the code target in the call instruction
    596   // and the return address pushed on the stack.
    597   static const int kCallTargetAddressOffset = 4;  // Use 32-bit displacement.
    598   // Distance between the start of the JS return sequence and where the
    599   // 32-bit displacement of a near call would be, relative to the pushed
    600   // return address.  TODO: Use return sequence length instead.
    601   // Should equal Debug::kX64JSReturnSequenceLength - kCallTargetAddressOffset;
    602   static const int kPatchReturnSequenceAddressOffset = 13 - 4;
    603   // Distance between start of patched debug break slot and where the
    604   // 32-bit displacement of a near call would be, relative to the pushed
    605   // return address.  TODO: Use return sequence length instead.
    606   // Should equal Debug::kX64JSReturnSequenceLength - kCallTargetAddressOffset;
    607   static const int kPatchDebugBreakSlotAddressOffset = 13 - 4;
    608   // TODO(X64): Rename this, removing the "Real", after changing the above.
    609   static const int kRealPatchReturnSequenceAddressOffset = 2;
    610 
    611   // Some x64 JS code is padded with int3 to make it large
    612   // enough to hold an instruction when the debugger patches it.
    613   static const int kJumpInstructionLength = 13;
    614   static const int kCallInstructionLength = 13;
    615   static const int kJSReturnSequenceLength = 13;
    616   static const int kShortCallInstructionLength = 5;
    617 
    618   // The debug break slot must be able to contain a call instruction.
    619   static const int kDebugBreakSlotLength = kCallInstructionLength;
    620 
    621   // One byte opcode for test eax,0xXXXXXXXX.
    622   static const byte kTestEaxByte = 0xA9;
    623   // One byte opcode for test al, 0xXX.
    624   static const byte kTestAlByte = 0xA8;
    625   // One byte opcode for nop.
    626   static const byte kNopByte = 0x90;
    627 
    628   // One byte prefix for a short conditional jump.
    629   static const byte kJccShortPrefix = 0x70;
    630   static const byte kJncShortOpcode = kJccShortPrefix | not_carry;
    631   static const byte kJcShortOpcode = kJccShortPrefix | carry;
    632 
    633 
    634 
    635   // ---------------------------------------------------------------------------
    636   // Code generation
    637   //
    638   // Function names correspond one-to-one to x64 instruction mnemonics.
    639   // Unless specified otherwise, instructions operate on 64-bit operands.
    640   //
    641   // If we need versions of an assembly instruction that operate on different
    642   // width arguments, we add a single-letter suffix specifying the width.
    643   // This is done for the following instructions: mov, cmp, inc, dec,
    644   // add, sub, and test.
    645   // There are no versions of these instructions without the suffix.
    646   // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
    647   // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
    648   // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
    649   // - Instructions on 64-bit (quadword) operands/registers use 'q'.
    650   //
    651   // Some mnemonics, such as "and", are the same as C++ keywords.
    652   // Naming conflicts with C++ keywords are resolved by adding a trailing '_'.
    653 
    654   // Insert the smallest number of nop instructions
    655   // possible to align the pc offset to a multiple
    656   // of m, where m must be a power of 2.
    657   void Align(int m);
    658   void Nop(int bytes = 1);
    659   // Aligns code to something that's optimal for a jump target for the platform.
    660   void CodeTargetAlign();
    661 
    662   // Stack
    663   void pushfq();
    664   void popfq();
    665 
    666   void push(Immediate value);
    667   // Push a 32 bit integer, and guarantee that it is actually pushed as a
    668   // 32 bit value, the normal push will optimize the 8 bit case.
    669   void push_imm32(int32_t imm32);
    670   void push(Register src);
    671   void push(const Operand& src);
    672 
    673   void pop(Register dst);
    674   void pop(const Operand& dst);
    675 
    676   void enter(Immediate size);
    677   void leave();
    678 
    679   // Moves
    680   void movb(Register dst, const Operand& src);
    681   void movb(Register dst, Immediate imm);
    682   void movb(const Operand& dst, Register src);
    683 
    684   // Move the low 16 bits of a 64-bit register value to a 16-bit
    685   // memory location.
    686   void movw(const Operand& dst, Register src);
    687 
    688   void movl(Register dst, Register src);
    689   void movl(Register dst, const Operand& src);
    690   void movl(const Operand& dst, Register src);
    691   void movl(const Operand& dst, Immediate imm);
    692   // Load a 32-bit immediate value, zero-extended to 64 bits.
    693   void movl(Register dst, Immediate imm32);
    694 
    695   // Move 64 bit register value to 64-bit memory location.
    696   void movq(const Operand& dst, Register src);
    697   // Move 64 bit memory location to 64-bit register value.
    698   void movq(Register dst, const Operand& src);
    699   void movq(Register dst, Register src);
    700   // Sign extends immediate 32-bit value to 64 bits.
    701   void movq(Register dst, Immediate x);
    702   // Move the offset of the label location relative to the current
    703   // position (after the move) to the destination.
    704   void movl(const Operand& dst, Label* src);
    705 
    706   // Move sign extended immediate to memory location.
    707   void movq(const Operand& dst, Immediate value);
    708   // Instructions to load a 64-bit immediate into a register.
    709   // All 64-bit immediates must have a relocation mode.
    710   void movq(Register dst, void* ptr, RelocInfo::Mode rmode);
    711   void movq(Register dst, int64_t value, RelocInfo::Mode rmode);
    712   void movq(Register dst, const char* s, RelocInfo::Mode rmode);
    713   // Moves the address of the external reference into the register.
    714   void movq(Register dst, ExternalReference ext);
    715   void movq(Register dst, Handle<Object> handle, RelocInfo::Mode rmode);
    716 
    717   void movsxbq(Register dst, const Operand& src);
    718   void movsxwq(Register dst, const Operand& src);
    719   void movsxlq(Register dst, Register src);
    720   void movsxlq(Register dst, const Operand& src);
    721   void movzxbq(Register dst, const Operand& src);
    722   void movzxbl(Register dst, const Operand& src);
    723   void movzxwq(Register dst, const Operand& src);
    724   void movzxwl(Register dst, const Operand& src);
    725 
    726   // Repeated moves.
    727 
    728   void repmovsb();
    729   void repmovsw();
    730   void repmovsl();
    731   void repmovsq();
    732 
    733   // Instruction to load from an immediate 64-bit pointer into RAX.
    734   void load_rax(void* ptr, RelocInfo::Mode rmode);
    735   void load_rax(ExternalReference ext);
    736 
    737   // Conditional moves.
    738   void cmovq(Condition cc, Register dst, Register src);
    739   void cmovq(Condition cc, Register dst, const Operand& src);
    740   void cmovl(Condition cc, Register dst, Register src);
    741   void cmovl(Condition cc, Register dst, const Operand& src);
    742 
    743   // Exchange two registers
    744   void xchg(Register dst, Register src);
    745 
    746   // Arithmetics
    747   void addl(Register dst, Register src) {
    748     arithmetic_op_32(0x03, dst, src);
    749   }
    750 
    751   void addl(Register dst, Immediate src) {
    752     immediate_arithmetic_op_32(0x0, dst, src);
    753   }
    754 
    755   void addl(Register dst, const Operand& src) {
    756     arithmetic_op_32(0x03, dst, src);
    757   }
    758 
    759   void addl(const Operand& dst, Immediate src) {
    760     immediate_arithmetic_op_32(0x0, dst, src);
    761   }
    762 
    763   void addl(const Operand& dst, Register src) {
    764     arithmetic_op_32(0x01, src, dst);
    765   }
    766 
    767   void addq(Register dst, Register src) {
    768     arithmetic_op(0x03, dst, src);
    769   }
    770 
    771   void addq(Register dst, const Operand& src) {
    772     arithmetic_op(0x03, dst, src);
    773   }
    774 
    775   void addq(const Operand& dst, Register src) {
    776     arithmetic_op(0x01, src, dst);
    777   }
    778 
    779   void addq(Register dst, Immediate src) {
    780     immediate_arithmetic_op(0x0, dst, src);
    781   }
    782 
    783   void addq(const Operand& dst, Immediate src) {
    784     immediate_arithmetic_op(0x0, dst, src);
    785   }
    786 
    787   void sbbl(Register dst, Register src) {
    788     arithmetic_op_32(0x1b, dst, src);
    789   }
    790 
    791   void sbbq(Register dst, Register src) {
    792     arithmetic_op(0x1b, dst, src);
    793   }
    794 
    795   void cmpb(Register dst, Immediate src) {
    796     immediate_arithmetic_op_8(0x7, dst, src);
    797   }
    798 
    799   void cmpb_al(Immediate src);
    800 
    801   void cmpb(Register dst, Register src) {
    802     arithmetic_op(0x3A, dst, src);
    803   }
    804 
    805   void cmpb(Register dst, const Operand& src) {
    806     arithmetic_op(0x3A, dst, src);
    807   }
    808 
    809   void cmpb(const Operand& dst, Register src) {
    810     arithmetic_op(0x38, src, dst);
    811   }
    812 
    813   void cmpb(const Operand& dst, Immediate src) {
    814     immediate_arithmetic_op_8(0x7, dst, src);
    815   }
    816 
    817   void cmpw(const Operand& dst, Immediate src) {
    818     immediate_arithmetic_op_16(0x7, dst, src);
    819   }
    820 
    821   void cmpw(Register dst, Immediate src) {
    822     immediate_arithmetic_op_16(0x7, dst, src);
    823   }
    824 
    825   void cmpw(Register dst, const Operand& src) {
    826     arithmetic_op_16(0x3B, dst, src);
    827   }
    828 
    829   void cmpw(Register dst, Register src) {
    830     arithmetic_op_16(0x3B, dst, src);
    831   }
    832 
    833   void cmpw(const Operand& dst, Register src) {
    834     arithmetic_op_16(0x39, src, dst);
    835   }
    836 
    837   void cmpl(Register dst, Register src) {
    838     arithmetic_op_32(0x3B, dst, src);
    839   }
    840 
    841   void cmpl(Register dst, const Operand& src) {
    842     arithmetic_op_32(0x3B, dst, src);
    843   }
    844 
    845   void cmpl(const Operand& dst, Register src) {
    846     arithmetic_op_32(0x39, src, dst);
    847   }
    848 
    849   void cmpl(Register dst, Immediate src) {
    850     immediate_arithmetic_op_32(0x7, dst, src);
    851   }
    852 
    853   void cmpl(const Operand& dst, Immediate src) {
    854     immediate_arithmetic_op_32(0x7, dst, src);
    855   }
    856 
    857   void cmpq(Register dst, Register src) {
    858     arithmetic_op(0x3B, dst, src);
    859   }
    860 
    861   void cmpq(Register dst, const Operand& src) {
    862     arithmetic_op(0x3B, dst, src);
    863   }
    864 
    865   void cmpq(const Operand& dst, Register src) {
    866     arithmetic_op(0x39, src, dst);
    867   }
    868 
    869   void cmpq(Register dst, Immediate src) {
    870     immediate_arithmetic_op(0x7, dst, src);
    871   }
    872 
    873   void cmpq(const Operand& dst, Immediate src) {
    874     immediate_arithmetic_op(0x7, dst, src);
    875   }
    876 
    877   void and_(Register dst, Register src) {
    878     arithmetic_op(0x23, dst, src);
    879   }
    880 
    881   void and_(Register dst, const Operand& src) {
    882     arithmetic_op(0x23, dst, src);
    883   }
    884 
    885   void and_(const Operand& dst, Register src) {
    886     arithmetic_op(0x21, src, dst);
    887   }
    888 
    889   void and_(Register dst, Immediate src) {
    890     immediate_arithmetic_op(0x4, dst, src);
    891   }
    892 
    893   void and_(const Operand& dst, Immediate src) {
    894     immediate_arithmetic_op(0x4, dst, src);
    895   }
    896 
    897   void andl(Register dst, Immediate src) {
    898     immediate_arithmetic_op_32(0x4, dst, src);
    899   }
    900 
    901   void andl(Register dst, Register src) {
    902     arithmetic_op_32(0x23, dst, src);
    903   }
    904 
    905   void andl(Register dst, const Operand& src) {
    906     arithmetic_op_32(0x23, dst, src);
    907   }
    908 
    909   void andb(Register dst, Immediate src) {
    910     immediate_arithmetic_op_8(0x4, dst, src);
    911   }
    912 
    913   void decq(Register dst);
    914   void decq(const Operand& dst);
    915   void decl(Register dst);
    916   void decl(const Operand& dst);
    917   void decb(Register dst);
    918   void decb(const Operand& dst);
    919 
    920   // Sign-extends rax into rdx:rax.
    921   void cqo();
    922   // Sign-extends eax into edx:eax.
    923   void cdq();
    924 
    925   // Divide rdx:rax by src.  Quotient in rax, remainder in rdx.
    926   void idivq(Register src);
    927   // Divide edx:eax by lower 32 bits of src.  Quotient in eax, rem. in edx.
    928   void idivl(Register src);
    929 
    930   // Signed multiply instructions.
    931   void imul(Register src);                               // rdx:rax = rax * src.
    932   void imul(Register dst, Register src);                 // dst = dst * src.
    933   void imul(Register dst, const Operand& src);           // dst = dst * src.
    934   void imul(Register dst, Register src, Immediate imm);  // dst = src * imm.
    935   // Signed 32-bit multiply instructions.
    936   void imull(Register dst, Register src);                 // dst = dst * src.
    937   void imull(Register dst, const Operand& src);           // dst = dst * src.
    938   void imull(Register dst, Register src, Immediate imm);  // dst = src * imm.
    939 
    940   void incq(Register dst);
    941   void incq(const Operand& dst);
    942   void incl(Register dst);
    943   void incl(const Operand& dst);
    944 
    945   void lea(Register dst, const Operand& src);
    946   void leal(Register dst, const Operand& src);
    947 
    948   // Multiply rax by src, put the result in rdx:rax.
    949   void mul(Register src);
    950 
    951   void neg(Register dst);
    952   void neg(const Operand& dst);
    953   void negl(Register dst);
    954 
    955   void not_(Register dst);
    956   void not_(const Operand& dst);
    957   void notl(Register dst);
    958 
    959   void or_(Register dst, Register src) {
    960     arithmetic_op(0x0B, dst, src);
    961   }
    962 
    963   void orl(Register dst, Register src) {
    964     arithmetic_op_32(0x0B, dst, src);
    965   }
    966 
    967   void or_(Register dst, const Operand& src) {
    968     arithmetic_op(0x0B, dst, src);
    969   }
    970 
    971   void orl(Register dst, const Operand& src) {
    972     arithmetic_op_32(0x0B, dst, src);
    973   }
    974 
    975   void or_(const Operand& dst, Register src) {
    976     arithmetic_op(0x09, src, dst);
    977   }
    978 
    979   void or_(Register dst, Immediate src) {
    980     immediate_arithmetic_op(0x1, dst, src);
    981   }
    982 
    983   void orl(Register dst, Immediate src) {
    984     immediate_arithmetic_op_32(0x1, dst, src);
    985   }
    986 
    987   void or_(const Operand& dst, Immediate src) {
    988     immediate_arithmetic_op(0x1, dst, src);
    989   }
    990 
    991   void orl(const Operand& dst, Immediate src) {
    992     immediate_arithmetic_op_32(0x1, dst, src);
    993   }
    994 
    995 
    996   void rcl(Register dst, Immediate imm8) {
    997     shift(dst, imm8, 0x2);
    998   }
    999 
   1000   void rol(Register dst, Immediate imm8) {
   1001     shift(dst, imm8, 0x0);
   1002   }
   1003 
   1004   void rcr(Register dst, Immediate imm8) {
   1005     shift(dst, imm8, 0x3);
   1006   }
   1007 
   1008   void ror(Register dst, Immediate imm8) {
   1009     shift(dst, imm8, 0x1);
   1010   }
   1011 
   1012   // Shifts dst:src left by cl bits, affecting only dst.
   1013   void shld(Register dst, Register src);
   1014 
   1015   // Shifts src:dst right by cl bits, affecting only dst.
   1016   void shrd(Register dst, Register src);
   1017 
   1018   // Shifts dst right, duplicating sign bit, by shift_amount bits.
   1019   // Shifting by 1 is handled efficiently.
   1020   void sar(Register dst, Immediate shift_amount) {
   1021     shift(dst, shift_amount, 0x7);
   1022   }
   1023 
   1024   // Shifts dst right, duplicating sign bit, by shift_amount bits.
   1025   // Shifting by 1 is handled efficiently.
   1026   void sarl(Register dst, Immediate shift_amount) {
   1027     shift_32(dst, shift_amount, 0x7);
   1028   }
   1029 
   1030   // Shifts dst right, duplicating sign bit, by cl % 64 bits.
   1031   void sar_cl(Register dst) {
   1032     shift(dst, 0x7);
   1033   }
   1034 
   1035   // Shifts dst right, duplicating sign bit, by cl % 64 bits.
   1036   void sarl_cl(Register dst) {
   1037     shift_32(dst, 0x7);
   1038   }
   1039 
   1040   void shl(Register dst, Immediate shift_amount) {
   1041     shift(dst, shift_amount, 0x4);
   1042   }
   1043 
   1044   void shl_cl(Register dst) {
   1045     shift(dst, 0x4);
   1046   }
   1047 
   1048   void shll_cl(Register dst) {
   1049     shift_32(dst, 0x4);
   1050   }
   1051 
   1052   void shll(Register dst, Immediate shift_amount) {
   1053     shift_32(dst, shift_amount, 0x4);
   1054   }
   1055 
   1056   void shr(Register dst, Immediate shift_amount) {
   1057     shift(dst, shift_amount, 0x5);
   1058   }
   1059 
   1060   void shr_cl(Register dst) {
   1061     shift(dst, 0x5);
   1062   }
   1063 
   1064   void shrl_cl(Register dst) {
   1065     shift_32(dst, 0x5);
   1066   }
   1067 
   1068   void shrl(Register dst, Immediate shift_amount) {
   1069     shift_32(dst, shift_amount, 0x5);
   1070   }
   1071 
   1072   void store_rax(void* dst, RelocInfo::Mode mode);
   1073   void store_rax(ExternalReference ref);
   1074 
   1075   void subq(Register dst, Register src) {
   1076     arithmetic_op(0x2B, dst, src);
   1077   }
   1078 
   1079   void subq(Register dst, const Operand& src) {
   1080     arithmetic_op(0x2B, dst, src);
   1081   }
   1082 
   1083   void subq(const Operand& dst, Register src) {
   1084     arithmetic_op(0x29, src, dst);
   1085   }
   1086 
   1087   void subq(Register dst, Immediate src) {
   1088     immediate_arithmetic_op(0x5, dst, src);
   1089   }
   1090 
   1091   void subq(const Operand& dst, Immediate src) {
   1092     immediate_arithmetic_op(0x5, dst, src);
   1093   }
   1094 
   1095   void subl(Register dst, Register src) {
   1096     arithmetic_op_32(0x2B, dst, src);
   1097   }
   1098 
   1099   void subl(Register dst, const Operand& src) {
   1100     arithmetic_op_32(0x2B, dst, src);
   1101   }
   1102 
   1103   void subl(const Operand& dst, Immediate src) {
   1104     immediate_arithmetic_op_32(0x5, dst, src);
   1105   }
   1106 
   1107   void subl(Register dst, Immediate src) {
   1108     immediate_arithmetic_op_32(0x5, dst, src);
   1109   }
   1110 
   1111   void subb(Register dst, Immediate src) {
   1112     immediate_arithmetic_op_8(0x5, dst, src);
   1113   }
   1114 
   1115   void testb(Register dst, Register src);
   1116   void testb(Register reg, Immediate mask);
   1117   void testb(const Operand& op, Immediate mask);
   1118   void testb(const Operand& op, Register reg);
   1119   void testl(Register dst, Register src);
   1120   void testl(Register reg, Immediate mask);
   1121   void testl(const Operand& op, Immediate mask);
   1122   void testq(const Operand& op, Register reg);
   1123   void testq(Register dst, Register src);
   1124   void testq(Register dst, Immediate mask);
   1125 
   1126   void xor_(Register dst, Register src) {
   1127     if (dst.code() == src.code()) {
   1128       arithmetic_op_32(0x33, dst, src);
   1129     } else {
   1130       arithmetic_op(0x33, dst, src);
   1131     }
   1132   }
   1133 
   1134   void xorl(Register dst, Register src) {
   1135     arithmetic_op_32(0x33, dst, src);
   1136   }
   1137 
   1138   void xorl(Register dst, const Operand& src) {
   1139     arithmetic_op_32(0x33, dst, src);
   1140   }
   1141 
   1142   void xorl(Register dst, Immediate src) {
   1143     immediate_arithmetic_op_32(0x6, dst, src);
   1144   }
   1145 
   1146   void xorl(const Operand& dst, Immediate src) {
   1147     immediate_arithmetic_op_32(0x6, dst, src);
   1148   }
   1149 
   1150   void xor_(Register dst, const Operand& src) {
   1151     arithmetic_op(0x33, dst, src);
   1152   }
   1153 
   1154   void xor_(const Operand& dst, Register src) {
   1155     arithmetic_op(0x31, src, dst);
   1156   }
   1157 
   1158   void xor_(Register dst, Immediate src) {
   1159     immediate_arithmetic_op(0x6, dst, src);
   1160   }
   1161 
   1162   void xor_(const Operand& dst, Immediate src) {
   1163     immediate_arithmetic_op(0x6, dst, src);
   1164   }
   1165 
   1166   // Bit operations.
   1167   void bt(const Operand& dst, Register src);
   1168   void bts(const Operand& dst, Register src);
   1169 
   1170   // Miscellaneous
   1171   void clc();
   1172   void cld();
   1173   void cpuid();
   1174   void hlt();
   1175   void int3();
   1176   void nop();
   1177   void rdtsc();
   1178   void ret(int imm16);
   1179   void setcc(Condition cc, Register reg);
   1180 
   1181   // Label operations & relative jumps (PPUM Appendix D)
   1182   //
   1183   // Takes a branch opcode (cc) and a label (L) and generates
   1184   // either a backward branch or a forward branch and links it
   1185   // to the label fixup chain. Usage:
   1186   //
   1187   // Label L;    // unbound label
   1188   // j(cc, &L);  // forward branch to unbound label
   1189   // bind(&L);   // bind label to the current pc
   1190   // j(cc, &L);  // backward branch to bound label
   1191   // bind(&L);   // illegal: a label may be bound only once
   1192   //
   1193   // Note: The same Label can be used for forward and backward branches
   1194   // but it may be bound only once.
   1195 
   1196   void bind(Label* L);  // binds an unbound label L to the current code position
   1197 
   1198   // Calls
   1199   // Call near relative 32-bit displacement, relative to next instruction.
   1200   void call(Label* L);
   1201   void call(Handle<Code> target,
   1202             RelocInfo::Mode rmode = RelocInfo::CODE_TARGET,
   1203             unsigned ast_id = kNoASTId);
   1204 
   1205   // Calls directly to the given address using a relative offset.
   1206   // Should only ever be used in Code objects for calls within the
   1207   // same Code object. Should not be used when generating new code (use labels),
   1208   // but only when patching existing code.
   1209   void call(Address target);
   1210 
   1211   // Call near absolute indirect, address in register
   1212   void call(Register adr);
   1213 
   1214   // Call near indirect
   1215   void call(const Operand& operand);
   1216 
   1217   // Jumps
   1218   // Jump short or near relative.
   1219   // Use a 32-bit signed displacement.
   1220   // Unconditional jump to L
   1221   void jmp(Label* L, Label::Distance distance = Label::kFar);
   1222   void jmp(Handle<Code> target, RelocInfo::Mode rmode);
   1223 
   1224   // Jump near absolute indirect (r64)
   1225   void jmp(Register adr);
   1226 
   1227   // Jump near absolute indirect (m64)
   1228   void jmp(const Operand& src);
   1229 
   1230   // Conditional jumps
   1231   void j(Condition cc,
   1232          Label* L,
   1233          Label::Distance distance = Label::kFar);
   1234   void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
   1235 
   1236   // Floating-point operations
   1237   void fld(int i);
   1238 
   1239   void fld1();
   1240   void fldz();
   1241   void fldpi();
   1242   void fldln2();
   1243 
   1244   void fld_s(const Operand& adr);
   1245   void fld_d(const Operand& adr);
   1246 
   1247   void fstp_s(const Operand& adr);
   1248   void fstp_d(const Operand& adr);
   1249   void fstp(int index);
   1250 
   1251   void fild_s(const Operand& adr);
   1252   void fild_d(const Operand& adr);
   1253 
   1254   void fist_s(const Operand& adr);
   1255 
   1256   void fistp_s(const Operand& adr);
   1257   void fistp_d(const Operand& adr);
   1258 
   1259   void fisttp_s(const Operand& adr);
   1260   void fisttp_d(const Operand& adr);
   1261 
   1262   void fabs();
   1263   void fchs();
   1264 
   1265   void fadd(int i);
   1266   void fsub(int i);
   1267   void fmul(int i);
   1268   void fdiv(int i);
   1269 
   1270   void fisub_s(const Operand& adr);
   1271 
   1272   void faddp(int i = 1);
   1273   void fsubp(int i = 1);
   1274   void fsubrp(int i = 1);
   1275   void fmulp(int i = 1);
   1276   void fdivp(int i = 1);
   1277   void fprem();
   1278   void fprem1();
   1279 
   1280   void fxch(int i = 1);
   1281   void fincstp();
   1282   void ffree(int i = 0);
   1283 
   1284   void ftst();
   1285   void fucomp(int i);
   1286   void fucompp();
   1287   void fucomi(int i);
   1288   void fucomip();
   1289 
   1290   void fcompp();
   1291   void fnstsw_ax();
   1292   void fwait();
   1293   void fnclex();
   1294 
   1295   void fsin();
   1296   void fcos();
   1297   void fptan();
   1298   void fyl2x();
   1299   void f2xm1();
   1300   void fscale();
   1301   void fninit();
   1302 
   1303   void frndint();
   1304 
   1305   void sahf();
   1306 
   1307   // SSE2 instructions
   1308   void movd(XMMRegister dst, Register src);
   1309   void movd(Register dst, XMMRegister src);
   1310   void movq(XMMRegister dst, Register src);
   1311   void movq(Register dst, XMMRegister src);
   1312   void movq(XMMRegister dst, XMMRegister src);
   1313   void extractps(Register dst, XMMRegister src, byte imm8);
   1314 
   1315   // Don't use this unless it's important to keep the
   1316   // top half of the destination register unchanged.
   1317   // Used movaps when moving double values and movq for integer
   1318   // values in xmm registers.
   1319   void movsd(XMMRegister dst, XMMRegister src);
   1320 
   1321   void movsd(const Operand& dst, XMMRegister src);
   1322   void movsd(XMMRegister dst, const Operand& src);
   1323 
   1324   void movdqa(const Operand& dst, XMMRegister src);
   1325   void movdqa(XMMRegister dst, const Operand& src);
   1326 
   1327   void movapd(XMMRegister dst, XMMRegister src);
   1328   void movaps(XMMRegister dst, XMMRegister src);
   1329 
   1330   void movss(XMMRegister dst, const Operand& src);
   1331   void movss(const Operand& dst, XMMRegister src);
   1332 
   1333   void cvttss2si(Register dst, const Operand& src);
   1334   void cvttss2si(Register dst, XMMRegister src);
   1335   void cvttsd2si(Register dst, const Operand& src);
   1336   void cvttsd2si(Register dst, XMMRegister src);
   1337   void cvttsd2siq(Register dst, XMMRegister src);
   1338 
   1339   void cvtlsi2sd(XMMRegister dst, const Operand& src);
   1340   void cvtlsi2sd(XMMRegister dst, Register src);
   1341   void cvtqsi2sd(XMMRegister dst, const Operand& src);
   1342   void cvtqsi2sd(XMMRegister dst, Register src);
   1343 
   1344   void cvtlsi2ss(XMMRegister dst, Register src);
   1345 
   1346   void cvtss2sd(XMMRegister dst, XMMRegister src);
   1347   void cvtss2sd(XMMRegister dst, const Operand& src);
   1348   void cvtsd2ss(XMMRegister dst, XMMRegister src);
   1349 
   1350   void cvtsd2si(Register dst, XMMRegister src);
   1351   void cvtsd2siq(Register dst, XMMRegister src);
   1352 
   1353   void addsd(XMMRegister dst, XMMRegister src);
   1354   void subsd(XMMRegister dst, XMMRegister src);
   1355   void mulsd(XMMRegister dst, XMMRegister src);
   1356   void divsd(XMMRegister dst, XMMRegister src);
   1357 
   1358   void andpd(XMMRegister dst, XMMRegister src);
   1359   void orpd(XMMRegister dst, XMMRegister src);
   1360   void xorpd(XMMRegister dst, XMMRegister src);
   1361   void xorps(XMMRegister dst, XMMRegister src);
   1362   void sqrtsd(XMMRegister dst, XMMRegister src);
   1363 
   1364   void ucomisd(XMMRegister dst, XMMRegister src);
   1365   void ucomisd(XMMRegister dst, const Operand& src);
   1366 
   1367   enum RoundingMode {
   1368     kRoundToNearest = 0x0,
   1369     kRoundDown      = 0x1,
   1370     kRoundUp        = 0x2,
   1371     kRoundToZero    = 0x3
   1372   };
   1373 
   1374   void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
   1375 
   1376   void movmskpd(Register dst, XMMRegister src);
   1377 
   1378   // The first argument is the reg field, the second argument is the r/m field.
   1379   void emit_sse_operand(XMMRegister dst, XMMRegister src);
   1380   void emit_sse_operand(XMMRegister reg, const Operand& adr);
   1381   void emit_sse_operand(XMMRegister dst, Register src);
   1382   void emit_sse_operand(Register dst, XMMRegister src);
   1383 
   1384   // Debugging
   1385   void Print();
   1386 
   1387   // Check the code size generated from label to here.
   1388   int SizeOfCodeGeneratedSince(Label* label) {
   1389     return pc_offset() - label->pos();
   1390   }
   1391 
   1392   // Mark address of the ExitJSFrame code.
   1393   void RecordJSReturn();
   1394 
   1395   // Mark address of a debug break slot.
   1396   void RecordDebugBreakSlot();
   1397 
   1398   // Record a comment relocation entry that can be used by a disassembler.
   1399   // Use --code-comments to enable.
   1400   void RecordComment(const char* msg, bool force = false);
   1401 
   1402   // Writes a single word of data in the code stream.
   1403   // Used for inline tables, e.g., jump-tables.
   1404   void db(uint8_t data);
   1405   void dd(uint32_t data);
   1406 
   1407   int pc_offset() const { return static_cast<int>(pc_ - buffer_); }
   1408 
   1409   PositionsRecorder* positions_recorder() { return &positions_recorder_; }
   1410 
   1411   // Check if there is less than kGap bytes available in the buffer.
   1412   // If this is the case, we need to grow the buffer before emitting
   1413   // an instruction or relocation information.
   1414   inline bool buffer_overflow() const {
   1415     return pc_ >= reloc_info_writer.pos() - kGap;
   1416   }
   1417 
   1418   // Get the number of bytes available in the buffer.
   1419   inline int available_space() const {
   1420     return static_cast<int>(reloc_info_writer.pos() - pc_);
   1421   }
   1422 
   1423   static bool IsNop(Address addr);
   1424 
   1425   // Avoid overflows for displacements etc.
   1426   static const int kMaximalBufferSize = 512*MB;
   1427   static const int kMinimalBufferSize = 4*KB;
   1428 
   1429   byte byte_at(int pos)  { return buffer_[pos]; }
   1430   void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
   1431 
   1432  protected:
   1433   bool emit_debug_code() const { return emit_debug_code_; }
   1434 
   1435  private:
   1436   byte* addr_at(int pos)  { return buffer_ + pos; }
   1437   uint32_t long_at(int pos)  {
   1438     return *reinterpret_cast<uint32_t*>(addr_at(pos));
   1439   }
   1440   void long_at_put(int pos, uint32_t x)  {
   1441     *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
   1442   }
   1443 
   1444   // code emission
   1445   void GrowBuffer();
   1446 
   1447   void emit(byte x) { *pc_++ = x; }
   1448   inline void emitl(uint32_t x);
   1449   inline void emitq(uint64_t x, RelocInfo::Mode rmode);
   1450   inline void emitw(uint16_t x);
   1451   inline void emit_code_target(Handle<Code> target,
   1452                                RelocInfo::Mode rmode,
   1453                                unsigned ast_id = kNoASTId);
   1454   void emit(Immediate x) { emitl(x.value_); }
   1455 
   1456   // Emits a REX prefix that encodes a 64-bit operand size and
   1457   // the top bit of both register codes.
   1458   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
   1459   // REX.W is set.
   1460   inline void emit_rex_64(XMMRegister reg, Register rm_reg);
   1461   inline void emit_rex_64(Register reg, XMMRegister rm_reg);
   1462   inline void emit_rex_64(Register reg, Register rm_reg);
   1463 
   1464   // Emits a REX prefix that encodes a 64-bit operand size and
   1465   // the top bit of the destination, index, and base register codes.
   1466   // The high bit of reg is used for REX.R, the high bit of op's base
   1467   // register is used for REX.B, and the high bit of op's index register
   1468   // is used for REX.X.  REX.W is set.
   1469   inline void emit_rex_64(Register reg, const Operand& op);
   1470   inline void emit_rex_64(XMMRegister reg, const Operand& op);
   1471 
   1472   // Emits a REX prefix that encodes a 64-bit operand size and
   1473   // the top bit of the register code.
   1474   // The high bit of register is used for REX.B.
   1475   // REX.W is set and REX.R and REX.X are clear.
   1476   inline void emit_rex_64(Register rm_reg);
   1477 
   1478   // Emits a REX prefix that encodes a 64-bit operand size and
   1479   // the top bit of the index and base register codes.
   1480   // The high bit of op's base register is used for REX.B, and the high
   1481   // bit of op's index register is used for REX.X.
   1482   // REX.W is set and REX.R clear.
   1483   inline void emit_rex_64(const Operand& op);
   1484 
   1485   // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
   1486   void emit_rex_64() { emit(0x48); }
   1487 
   1488   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
   1489   // REX.W is clear.
   1490   inline void emit_rex_32(Register reg, Register rm_reg);
   1491 
   1492   // The high bit of reg is used for REX.R, the high bit of op's base
   1493   // register is used for REX.B, and the high bit of op's index register
   1494   // is used for REX.X.  REX.W is cleared.
   1495   inline void emit_rex_32(Register reg, const Operand& op);
   1496 
   1497   // High bit of rm_reg goes to REX.B.
   1498   // REX.W, REX.R and REX.X are clear.
   1499   inline void emit_rex_32(Register rm_reg);
   1500 
   1501   // High bit of base goes to REX.B and high bit of index to REX.X.
   1502   // REX.W and REX.R are clear.
   1503   inline void emit_rex_32(const Operand& op);
   1504 
   1505   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
   1506   // REX.W is cleared.  If no REX bits are set, no byte is emitted.
   1507   inline void emit_optional_rex_32(Register reg, Register rm_reg);
   1508 
   1509   // The high bit of reg is used for REX.R, the high bit of op's base
   1510   // register is used for REX.B, and the high bit of op's index register
   1511   // is used for REX.X.  REX.W is cleared.  If no REX bits are set, nothing
   1512   // is emitted.
   1513   inline void emit_optional_rex_32(Register reg, const Operand& op);
   1514 
   1515   // As for emit_optional_rex_32(Register, Register), except that
   1516   // the registers are XMM registers.
   1517   inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
   1518 
   1519   // As for emit_optional_rex_32(Register, Register), except that
   1520   // one of the registers is an XMM registers.
   1521   inline void emit_optional_rex_32(XMMRegister reg, Register base);
   1522 
   1523   // As for emit_optional_rex_32(Register, Register), except that
   1524   // one of the registers is an XMM registers.
   1525   inline void emit_optional_rex_32(Register reg, XMMRegister base);
   1526 
   1527   // As for emit_optional_rex_32(Register, const Operand&), except that
   1528   // the register is an XMM register.
   1529   inline void emit_optional_rex_32(XMMRegister reg, const Operand& op);
   1530 
   1531   // Optionally do as emit_rex_32(Register) if the register number has
   1532   // the high bit set.
   1533   inline void emit_optional_rex_32(Register rm_reg);
   1534 
   1535   // Optionally do as emit_rex_32(const Operand&) if the operand register
   1536   // numbers have a high bit set.
   1537   inline void emit_optional_rex_32(const Operand& op);
   1538 
   1539 
   1540   // Emit the ModR/M byte, and optionally the SIB byte and
   1541   // 1- or 4-byte offset for a memory operand.  Also encodes
   1542   // the second operand of the operation, a register or operation
   1543   // subcode, into the reg field of the ModR/M byte.
   1544   void emit_operand(Register reg, const Operand& adr) {
   1545     emit_operand(reg.low_bits(), adr);
   1546   }
   1547 
   1548   // Emit the ModR/M byte, and optionally the SIB byte and
   1549   // 1- or 4-byte offset for a memory operand.  Also used to encode
   1550   // a three-bit opcode extension into the ModR/M byte.
   1551   void emit_operand(int rm, const Operand& adr);
   1552 
   1553   // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
   1554   void emit_modrm(Register reg, Register rm_reg) {
   1555     emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
   1556   }
   1557 
   1558   // Emit a ModR/M byte with an operation subcode in the reg field and
   1559   // a register in the rm_reg field.
   1560   void emit_modrm(int code, Register rm_reg) {
   1561     ASSERT(is_uint3(code));
   1562     emit(0xC0 | code << 3 | rm_reg.low_bits());
   1563   }
   1564 
   1565   // Emit the code-object-relative offset of the label's position
   1566   inline void emit_code_relative_offset(Label* label);
   1567 
   1568   // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
   1569   // AND, OR, XOR, or CMP.  The encodings of these operations are all
   1570   // similar, differing just in the opcode or in the reg field of the
   1571   // ModR/M byte.
   1572   void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
   1573   void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
   1574   void arithmetic_op_32(byte opcode, Register reg, Register rm_reg);
   1575   void arithmetic_op_32(byte opcode, Register reg, const Operand& rm_reg);
   1576   void arithmetic_op(byte opcode, Register reg, Register rm_reg);
   1577   void arithmetic_op(byte opcode, Register reg, const Operand& rm_reg);
   1578   void immediate_arithmetic_op(byte subcode, Register dst, Immediate src);
   1579   void immediate_arithmetic_op(byte subcode, const Operand& dst, Immediate src);
   1580   // Operate on a byte in memory or register.
   1581   void immediate_arithmetic_op_8(byte subcode,
   1582                                  Register dst,
   1583                                  Immediate src);
   1584   void immediate_arithmetic_op_8(byte subcode,
   1585                                  const Operand& dst,
   1586                                  Immediate src);
   1587   // Operate on a word in memory or register.
   1588   void immediate_arithmetic_op_16(byte subcode,
   1589                                   Register dst,
   1590                                   Immediate src);
   1591   void immediate_arithmetic_op_16(byte subcode,
   1592                                   const Operand& dst,
   1593                                   Immediate src);
   1594   // Operate on a 32-bit word in memory or register.
   1595   void immediate_arithmetic_op_32(byte subcode,
   1596                                   Register dst,
   1597                                   Immediate src);
   1598   void immediate_arithmetic_op_32(byte subcode,
   1599                                   const Operand& dst,
   1600                                   Immediate src);
   1601 
   1602   // Emit machine code for a shift operation.
   1603   void shift(Register dst, Immediate shift_amount, int subcode);
   1604   void shift_32(Register dst, Immediate shift_amount, int subcode);
   1605   // Shift dst by cl % 64 bits.
   1606   void shift(Register dst, int subcode);
   1607   void shift_32(Register dst, int subcode);
   1608 
   1609   void emit_farith(int b1, int b2, int i);
   1610 
   1611   // labels
   1612   // void print(Label* L);
   1613   void bind_to(Label* L, int pos);
   1614 
   1615   // record reloc info for current pc_
   1616   void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
   1617 
   1618   friend class CodePatcher;
   1619   friend class EnsureSpace;
   1620   friend class RegExpMacroAssemblerX64;
   1621 
   1622   // Code buffer:
   1623   // The buffer into which code and relocation info are generated.
   1624   byte* buffer_;
   1625   int buffer_size_;
   1626   // True if the assembler owns the buffer, false if buffer is external.
   1627   bool own_buffer_;
   1628 
   1629   // code generation
   1630   byte* pc_;  // the program counter; moves forward
   1631   RelocInfoWriter reloc_info_writer;
   1632 
   1633   List< Handle<Code> > code_targets_;
   1634 
   1635   PositionsRecorder positions_recorder_;
   1636 
   1637   bool emit_debug_code_;
   1638 
   1639   friend class PositionsRecorder;
   1640 };
   1641 
   1642 
   1643 // Helper class that ensures that there is enough space for generating
   1644 // instructions and relocation information.  The constructor makes
   1645 // sure that there is enough space and (in debug mode) the destructor
   1646 // checks that we did not generate too much.
   1647 class EnsureSpace BASE_EMBEDDED {
   1648  public:
   1649   explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
   1650     if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
   1651 #ifdef DEBUG
   1652     space_before_ = assembler_->available_space();
   1653 #endif
   1654   }
   1655 
   1656 #ifdef DEBUG
   1657   ~EnsureSpace() {
   1658     int bytes_generated = space_before_ - assembler_->available_space();
   1659     ASSERT(bytes_generated < assembler_->kGap);
   1660   }
   1661 #endif
   1662 
   1663  private:
   1664   Assembler* assembler_;
   1665 #ifdef DEBUG
   1666   int space_before_;
   1667 #endif
   1668 };
   1669 
   1670 } }  // namespace v8::internal
   1671 
   1672 #endif  // V8_X64_ASSEMBLER_X64_H_
   1673