1 // Copyright (c) 1994-2006 Sun Microsystems Inc. 2 // All Rights Reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // - Redistributions of source code must retain the above copyright notice, 9 // this list of conditions and the following disclaimer. 10 // 11 // - Redistribution in binary form must reproduce the above copyright 12 // notice, this list of conditions and the following disclaimer in the 13 // documentation and/or other materials provided with the distribution. 14 // 15 // - Neither the name of Sun Microsystems or the names of contributors may 16 // be used to endorse or promote products derived from this software without 17 // specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // The original source code covered by the above license above has been 32 // modified significantly by Google Inc. 33 // Copyright 2012 the V8 project authors. All rights reserved. 34 35 // A lightweight X64 Assembler. 36 37 #ifndef V8_X64_ASSEMBLER_X64_H_ 38 #define V8_X64_ASSEMBLER_X64_H_ 39 40 #include <deque> 41 42 #include "src/assembler.h" 43 #include "src/x64/sse-instr.h" 44 45 namespace v8 { 46 namespace internal { 47 48 // Utility functions 49 50 #define GENERAL_REGISTERS(V) \ 51 V(rax) \ 52 V(rcx) \ 53 V(rdx) \ 54 V(rbx) \ 55 V(rsp) \ 56 V(rbp) \ 57 V(rsi) \ 58 V(rdi) \ 59 V(r8) \ 60 V(r9) \ 61 V(r10) \ 62 V(r11) \ 63 V(r12) \ 64 V(r13) \ 65 V(r14) \ 66 V(r15) 67 68 #define ALLOCATABLE_GENERAL_REGISTERS(V) \ 69 V(rax) \ 70 V(rbx) \ 71 V(rdx) \ 72 V(rcx) \ 73 V(rsi) \ 74 V(rdi) \ 75 V(r8) \ 76 V(r9) \ 77 V(r11) \ 78 V(r12) \ 79 V(r14) \ 80 V(r15) 81 82 // The length of pushq(rbp), movp(rbp, rsp), Push(rsi) and Push(rdi). 83 static const int kNoCodeAgeSequenceLength = kPointerSize == kInt64Size ? 6 : 17; 84 85 // CPU Registers. 86 // 87 // 1) We would prefer to use an enum, but enum values are assignment- 88 // compatible with int, which has caused code-generation bugs. 89 // 90 // 2) We would prefer to use a class instead of a struct but we don't like 91 // the register initialization to depend on the particular initialization 92 // order (which appears to be different on OS X, Linux, and Windows for the 93 // installed versions of C++ we tried). Using a struct permits C-style 94 // "initialization". Also, the Register objects cannot be const as this 95 // forces initialization stubs in MSVC, making us dependent on initialization 96 // order. 97 // 98 // 3) By not using an enum, we are possibly preventing the compiler from 99 // doing certain constant folds, which may significantly reduce the 100 // code generated for some assembly instructions (because they boil down 101 // to a few constants). If this is a problem, we could change the code 102 // such that we use an enum in optimized mode, and the struct in debug 103 // mode. This way we get the compile-time error checking in debug mode 104 // and best performance in optimized code. 105 // 106 struct Register { 107 enum Code { 108 #define REGISTER_CODE(R) kCode_##R, 109 GENERAL_REGISTERS(REGISTER_CODE) 110 #undef REGISTER_CODE 111 kAfterLast, 112 kCode_no_reg = -1 113 }; 114 115 static const int kNumRegisters = Code::kAfterLast; 116 117 static Register from_code(int code) { 118 DCHECK(code >= 0); 119 DCHECK(code < kNumRegisters); 120 Register r = {code}; 121 return r; 122 } 123 bool is_valid() const { return 0 <= reg_code && reg_code < kNumRegisters; } 124 bool is(Register reg) const { return reg_code == reg.reg_code; } 125 int code() const { 126 DCHECK(is_valid()); 127 return reg_code; 128 } 129 int bit() const { 130 DCHECK(is_valid()); 131 return 1 << reg_code; 132 } 133 134 bool is_byte_register() const { return reg_code <= 3; } 135 // Return the high bit of the register code as a 0 or 1. Used often 136 // when constructing the REX prefix byte. 137 int high_bit() const { return reg_code >> 3; } 138 // Return the 3 low bits of the register code. Used when encoding registers 139 // in modR/M, SIB, and opcode bytes. 140 int low_bits() const { return reg_code & 0x7; } 141 142 // Unfortunately we can't make this private in a struct when initializing 143 // by assignment. 144 int reg_code; 145 }; 146 147 148 #define DECLARE_REGISTER(R) const Register R = {Register::kCode_##R}; 149 GENERAL_REGISTERS(DECLARE_REGISTER) 150 #undef DECLARE_REGISTER 151 const Register no_reg = {Register::kCode_no_reg}; 152 153 154 #ifdef _WIN64 155 // Windows calling convention 156 const Register arg_reg_1 = {Register::kCode_rcx}; 157 const Register arg_reg_2 = {Register::kCode_rdx}; 158 const Register arg_reg_3 = {Register::kCode_r8}; 159 const Register arg_reg_4 = {Register::kCode_r9}; 160 #else 161 // AMD64 calling convention 162 const Register arg_reg_1 = {Register::kCode_rdi}; 163 const Register arg_reg_2 = {Register::kCode_rsi}; 164 const Register arg_reg_3 = {Register::kCode_rdx}; 165 const Register arg_reg_4 = {Register::kCode_rcx}; 166 #endif // _WIN64 167 168 169 #define DOUBLE_REGISTERS(V) \ 170 V(xmm0) \ 171 V(xmm1) \ 172 V(xmm2) \ 173 V(xmm3) \ 174 V(xmm4) \ 175 V(xmm5) \ 176 V(xmm6) \ 177 V(xmm7) \ 178 V(xmm8) \ 179 V(xmm9) \ 180 V(xmm10) \ 181 V(xmm11) \ 182 V(xmm12) \ 183 V(xmm13) \ 184 V(xmm14) \ 185 V(xmm15) 186 187 #define FLOAT_REGISTERS DOUBLE_REGISTERS 188 #define SIMD128_REGISTERS DOUBLE_REGISTERS 189 190 #define ALLOCATABLE_DOUBLE_REGISTERS(V) \ 191 V(xmm0) \ 192 V(xmm1) \ 193 V(xmm2) \ 194 V(xmm3) \ 195 V(xmm4) \ 196 V(xmm5) \ 197 V(xmm6) \ 198 V(xmm7) \ 199 V(xmm8) \ 200 V(xmm9) \ 201 V(xmm10) \ 202 V(xmm11) \ 203 V(xmm12) \ 204 V(xmm13) \ 205 V(xmm14) 206 207 static const bool kSimpleFPAliasing = true; 208 static const bool kSimdMaskRegisters = false; 209 210 struct XMMRegister { 211 enum Code { 212 #define REGISTER_CODE(R) kCode_##R, 213 DOUBLE_REGISTERS(REGISTER_CODE) 214 #undef REGISTER_CODE 215 kAfterLast, 216 kCode_no_reg = -1 217 }; 218 219 static const int kMaxNumRegisters = Code::kAfterLast; 220 221 static XMMRegister from_code(int code) { 222 XMMRegister result = {code}; 223 return result; 224 } 225 226 bool is_valid() const { return 0 <= reg_code && reg_code < kMaxNumRegisters; } 227 bool is(XMMRegister reg) const { return reg_code == reg.reg_code; } 228 int code() const { 229 DCHECK(is_valid()); 230 return reg_code; 231 } 232 233 // Return the high bit of the register code as a 0 or 1. Used often 234 // when constructing the REX prefix byte. 235 int high_bit() const { return reg_code >> 3; } 236 // Return the 3 low bits of the register code. Used when encoding registers 237 // in modR/M, SIB, and opcode bytes. 238 int low_bits() const { return reg_code & 0x7; } 239 240 // Unfortunately we can't make this private in a struct when initializing 241 // by assignment. 242 int reg_code; 243 }; 244 245 typedef XMMRegister FloatRegister; 246 247 typedef XMMRegister DoubleRegister; 248 249 typedef XMMRegister Simd128Register; 250 251 #define DECLARE_REGISTER(R) \ 252 const DoubleRegister R = {DoubleRegister::kCode_##R}; 253 DOUBLE_REGISTERS(DECLARE_REGISTER) 254 #undef DECLARE_REGISTER 255 const DoubleRegister no_double_reg = {DoubleRegister::kCode_no_reg}; 256 257 enum Condition { 258 // any value < 0 is considered no_condition 259 no_condition = -1, 260 261 overflow = 0, 262 no_overflow = 1, 263 below = 2, 264 above_equal = 3, 265 equal = 4, 266 not_equal = 5, 267 below_equal = 6, 268 above = 7, 269 negative = 8, 270 positive = 9, 271 parity_even = 10, 272 parity_odd = 11, 273 less = 12, 274 greater_equal = 13, 275 less_equal = 14, 276 greater = 15, 277 278 // Fake conditions that are handled by the 279 // opcodes using them. 280 always = 16, 281 never = 17, 282 // aliases 283 carry = below, 284 not_carry = above_equal, 285 zero = equal, 286 not_zero = not_equal, 287 sign = negative, 288 not_sign = positive, 289 last_condition = greater 290 }; 291 292 293 // Returns the equivalent of !cc. 294 // Negation of the default no_condition (-1) results in a non-default 295 // no_condition value (-2). As long as tests for no_condition check 296 // for condition < 0, this will work as expected. 297 inline Condition NegateCondition(Condition cc) { 298 return static_cast<Condition>(cc ^ 1); 299 } 300 301 302 // Commute a condition such that {a cond b == b cond' a}. 303 inline Condition CommuteCondition(Condition cc) { 304 switch (cc) { 305 case below: 306 return above; 307 case above: 308 return below; 309 case above_equal: 310 return below_equal; 311 case below_equal: 312 return above_equal; 313 case less: 314 return greater; 315 case greater: 316 return less; 317 case greater_equal: 318 return less_equal; 319 case less_equal: 320 return greater_equal; 321 default: 322 return cc; 323 } 324 } 325 326 327 enum RoundingMode { 328 kRoundToNearest = 0x0, 329 kRoundDown = 0x1, 330 kRoundUp = 0x2, 331 kRoundToZero = 0x3 332 }; 333 334 335 // ----------------------------------------------------------------------------- 336 // Machine instruction Immediates 337 338 class Immediate BASE_EMBEDDED { 339 public: 340 explicit Immediate(int32_t value) : value_(value) {} 341 explicit Immediate(int32_t value, RelocInfo::Mode rmode) 342 : value_(value), rmode_(rmode) {} 343 explicit Immediate(Smi* value) { 344 DCHECK(SmiValuesAre31Bits()); // Only available for 31-bit SMI. 345 value_ = static_cast<int32_t>(reinterpret_cast<intptr_t>(value)); 346 } 347 348 private: 349 int32_t value_; 350 RelocInfo::Mode rmode_ = RelocInfo::NONE32; 351 352 friend class Assembler; 353 }; 354 355 356 // ----------------------------------------------------------------------------- 357 // Machine instruction Operands 358 359 enum ScaleFactor { 360 times_1 = 0, 361 times_2 = 1, 362 times_4 = 2, 363 times_8 = 3, 364 times_int_size = times_4, 365 times_pointer_size = (kPointerSize == 8) ? times_8 : times_4 366 }; 367 368 369 class Operand BASE_EMBEDDED { 370 public: 371 // [base + disp/r] 372 Operand(Register base, int32_t disp); 373 374 // [base + index*scale + disp/r] 375 Operand(Register base, 376 Register index, 377 ScaleFactor scale, 378 int32_t disp); 379 380 // [index*scale + disp/r] 381 Operand(Register index, 382 ScaleFactor scale, 383 int32_t disp); 384 385 // Offset from existing memory operand. 386 // Offset is added to existing displacement as 32-bit signed values and 387 // this must not overflow. 388 Operand(const Operand& base, int32_t offset); 389 390 // [rip + disp/r] 391 explicit Operand(Label* label); 392 393 // Checks whether either base or index register is the given register. 394 // Does not check the "reg" part of the Operand. 395 bool AddressUsesRegister(Register reg) const; 396 397 // Queries related to the size of the generated instruction. 398 // Whether the generated instruction will have a REX prefix. 399 bool requires_rex() const { return rex_ != 0; } 400 // Size of the ModR/M, SIB and displacement parts of the generated 401 // instruction. 402 int operand_size() const { return len_; } 403 404 private: 405 byte rex_; 406 byte buf_[9]; 407 // The number of bytes of buf_ in use. 408 byte len_; 409 410 // Set the ModR/M byte without an encoded 'reg' register. The 411 // register is encoded later as part of the emit_operand operation. 412 // set_modrm can be called before or after set_sib and set_disp*. 413 inline void set_modrm(int mod, Register rm); 414 415 // Set the SIB byte if one is needed. Sets the length to 2 rather than 1. 416 inline void set_sib(ScaleFactor scale, Register index, Register base); 417 418 // Adds operand displacement fields (offsets added to the memory address). 419 // Needs to be called after set_sib, not before it. 420 inline void set_disp8(int disp); 421 inline void set_disp32(int disp); 422 inline void set_disp64(int64_t disp); // for labels. 423 424 friend class Assembler; 425 }; 426 427 #define ASSEMBLER_INSTRUCTION_LIST(V) \ 428 V(add) \ 429 V(and) \ 430 V(cmp) \ 431 V(cmpxchg) \ 432 V(dec) \ 433 V(idiv) \ 434 V(div) \ 435 V(imul) \ 436 V(inc) \ 437 V(lea) \ 438 V(mov) \ 439 V(movzxb) \ 440 V(movzxw) \ 441 V(neg) \ 442 V(not) \ 443 V(or) \ 444 V(repmovs) \ 445 V(sbb) \ 446 V(sub) \ 447 V(test) \ 448 V(xchg) \ 449 V(xor) 450 451 // Shift instructions on operands/registers with kPointerSize, kInt32Size and 452 // kInt64Size. 453 #define SHIFT_INSTRUCTION_LIST(V) \ 454 V(rol, 0x0) \ 455 V(ror, 0x1) \ 456 V(rcl, 0x2) \ 457 V(rcr, 0x3) \ 458 V(shl, 0x4) \ 459 V(shr, 0x5) \ 460 V(sar, 0x7) \ 461 462 463 class Assembler : public AssemblerBase { 464 private: 465 // We check before assembling an instruction that there is sufficient 466 // space to write an instruction and its relocation information. 467 // The relocation writer's position must be kGap bytes above the end of 468 // the generated instructions. This leaves enough space for the 469 // longest possible x64 instruction, 15 bytes, and the longest possible 470 // relocation information encoding, RelocInfoWriter::kMaxLength == 16. 471 // (There is a 15 byte limit on x64 instruction length that rules out some 472 // otherwise valid instructions.) 473 // This allows for a single, fast space check per instruction. 474 static const int kGap = 32; 475 476 public: 477 // Create an assembler. Instructions and relocation information are emitted 478 // into a buffer, with the instructions starting from the beginning and the 479 // relocation information starting from the end of the buffer. See CodeDesc 480 // for a detailed comment on the layout (globals.h). 481 // 482 // If the provided buffer is NULL, the assembler allocates and grows its own 483 // buffer, and buffer_size determines the initial buffer size. The buffer is 484 // owned by the assembler and deallocated upon destruction of the assembler. 485 // 486 // If the provided buffer is not NULL, the assembler uses the provided buffer 487 // for code generation and assumes its size to be buffer_size. If the buffer 488 // is too small, a fatal error occurs. No deallocation of the buffer is done 489 // upon destruction of the assembler. 490 Assembler(Isolate* isolate, void* buffer, int buffer_size); 491 virtual ~Assembler() { } 492 493 // GetCode emits any pending (non-emitted) code and fills the descriptor 494 // desc. GetCode() is idempotent; it returns the same result if no other 495 // Assembler functions are invoked in between GetCode() calls. 496 void GetCode(CodeDesc* desc); 497 498 // Read/Modify the code target in the relative branch/call instruction at pc. 499 // On the x64 architecture, we use relative jumps with a 32-bit displacement 500 // to jump to other Code objects in the Code space in the heap. 501 // Jumps to C functions are done indirectly through a 64-bit register holding 502 // the absolute address of the target. 503 // These functions convert between absolute Addresses of Code objects and 504 // the relative displacements stored in the code. 505 static inline Address target_address_at(Address pc, Address constant_pool); 506 static inline void set_target_address_at( 507 Isolate* isolate, Address pc, Address constant_pool, Address target, 508 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 509 static inline Address target_address_at(Address pc, Code* code); 510 static inline void set_target_address_at( 511 Isolate* isolate, Address pc, Code* code, Address target, 512 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 513 514 // Return the code target address at a call site from the return address 515 // of that call in the instruction stream. 516 static inline Address target_address_from_return_address(Address pc); 517 518 // This sets the branch destination (which is in the instruction on x64). 519 // This is for calls and branches within generated code. 520 inline static void deserialization_set_special_target_at( 521 Isolate* isolate, Address instruction_payload, Code* code, 522 Address target); 523 524 // This sets the internal reference at the pc. 525 inline static void deserialization_set_target_internal_reference_at( 526 Isolate* isolate, Address pc, Address target, 527 RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE); 528 529 static inline RelocInfo::Mode RelocInfoNone() { 530 if (kPointerSize == kInt64Size) { 531 return RelocInfo::NONE64; 532 } else { 533 DCHECK(kPointerSize == kInt32Size); 534 return RelocInfo::NONE32; 535 } 536 } 537 538 inline Handle<Object> code_target_object_handle_at(Address pc); 539 inline Address runtime_entry_at(Address pc); 540 // Number of bytes taken up by the branch target in the code. 541 static const int kSpecialTargetSize = 4; // Use 32-bit displacement. 542 // Distance between the address of the code target in the call instruction 543 // and the return address pushed on the stack. 544 static const int kCallTargetAddressOffset = 4; // Use 32-bit displacement. 545 // The length of call(kScratchRegister). 546 static const int kCallScratchRegisterInstructionLength = 3; 547 // The length of call(Immediate32). 548 static const int kShortCallInstructionLength = 5; 549 // The length of movq(kScratchRegister, address). 550 static const int kMoveAddressIntoScratchRegisterInstructionLength = 551 2 + kPointerSize; 552 // The length of movq(kScratchRegister, address) and call(kScratchRegister). 553 static const int kCallSequenceLength = 554 kMoveAddressIntoScratchRegisterInstructionLength + 555 kCallScratchRegisterInstructionLength; 556 557 // The debug break slot must be able to contain an indirect call sequence. 558 static const int kDebugBreakSlotLength = kCallSequenceLength; 559 // Distance between start of patched debug break slot and the emitted address 560 // to jump to. 561 static const int kPatchDebugBreakSlotAddressOffset = 562 kMoveAddressIntoScratchRegisterInstructionLength - kPointerSize; 563 564 // One byte opcode for test eax,0xXXXXXXXX. 565 static const byte kTestEaxByte = 0xA9; 566 // One byte opcode for test al, 0xXX. 567 static const byte kTestAlByte = 0xA8; 568 // One byte opcode for nop. 569 static const byte kNopByte = 0x90; 570 571 // One byte prefix for a short conditional jump. 572 static const byte kJccShortPrefix = 0x70; 573 static const byte kJncShortOpcode = kJccShortPrefix | not_carry; 574 static const byte kJcShortOpcode = kJccShortPrefix | carry; 575 static const byte kJnzShortOpcode = kJccShortPrefix | not_zero; 576 static const byte kJzShortOpcode = kJccShortPrefix | zero; 577 578 // VEX prefix encodings. 579 enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 }; 580 enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 }; 581 enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 }; 582 enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 }; 583 584 // --------------------------------------------------------------------------- 585 // Code generation 586 // 587 // Function names correspond one-to-one to x64 instruction mnemonics. 588 // Unless specified otherwise, instructions operate on 64-bit operands. 589 // 590 // If we need versions of an assembly instruction that operate on different 591 // width arguments, we add a single-letter suffix specifying the width. 592 // This is done for the following instructions: mov, cmp, inc, dec, 593 // add, sub, and test. 594 // There are no versions of these instructions without the suffix. 595 // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'. 596 // - Instructions on 16-bit (word) operands/registers have a trailing 'w'. 597 // - Instructions on 32-bit (doubleword) operands/registers use 'l'. 598 // - Instructions on 64-bit (quadword) operands/registers use 'q'. 599 // - Instructions on operands/registers with pointer size use 'p'. 600 601 STATIC_ASSERT(kPointerSize == kInt64Size || kPointerSize == kInt32Size); 602 603 #define DECLARE_INSTRUCTION(instruction) \ 604 template<class P1> \ 605 void instruction##p(P1 p1) { \ 606 emit_##instruction(p1, kPointerSize); \ 607 } \ 608 \ 609 template<class P1> \ 610 void instruction##l(P1 p1) { \ 611 emit_##instruction(p1, kInt32Size); \ 612 } \ 613 \ 614 template<class P1> \ 615 void instruction##q(P1 p1) { \ 616 emit_##instruction(p1, kInt64Size); \ 617 } \ 618 \ 619 template<class P1, class P2> \ 620 void instruction##p(P1 p1, P2 p2) { \ 621 emit_##instruction(p1, p2, kPointerSize); \ 622 } \ 623 \ 624 template<class P1, class P2> \ 625 void instruction##l(P1 p1, P2 p2) { \ 626 emit_##instruction(p1, p2, kInt32Size); \ 627 } \ 628 \ 629 template<class P1, class P2> \ 630 void instruction##q(P1 p1, P2 p2) { \ 631 emit_##instruction(p1, p2, kInt64Size); \ 632 } \ 633 \ 634 template<class P1, class P2, class P3> \ 635 void instruction##p(P1 p1, P2 p2, P3 p3) { \ 636 emit_##instruction(p1, p2, p3, kPointerSize); \ 637 } \ 638 \ 639 template<class P1, class P2, class P3> \ 640 void instruction##l(P1 p1, P2 p2, P3 p3) { \ 641 emit_##instruction(p1, p2, p3, kInt32Size); \ 642 } \ 643 \ 644 template<class P1, class P2, class P3> \ 645 void instruction##q(P1 p1, P2 p2, P3 p3) { \ 646 emit_##instruction(p1, p2, p3, kInt64Size); \ 647 } 648 ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION) 649 #undef DECLARE_INSTRUCTION 650 651 // Insert the smallest number of nop instructions 652 // possible to align the pc offset to a multiple 653 // of m, where m must be a power of 2. 654 void Align(int m); 655 // Insert the smallest number of zero bytes possible to align the pc offset 656 // to a mulitple of m. m must be a power of 2 (>= 2). 657 void DataAlign(int m); 658 void Nop(int bytes = 1); 659 // Aligns code to something that's optimal for a jump target for the platform. 660 void CodeTargetAlign(); 661 662 // Stack 663 void pushfq(); 664 void popfq(); 665 666 void pushq(Immediate value); 667 // Push a 32 bit integer, and guarantee that it is actually pushed as a 668 // 32 bit value, the normal push will optimize the 8 bit case. 669 void pushq_imm32(int32_t imm32); 670 void pushq(Register src); 671 void pushq(const Operand& src); 672 673 void popq(Register dst); 674 void popq(const Operand& dst); 675 676 void enter(Immediate size); 677 void leave(); 678 679 // Moves 680 void movb(Register dst, const Operand& src); 681 void movb(Register dst, Immediate imm); 682 void movb(const Operand& dst, Register src); 683 void movb(const Operand& dst, Immediate imm); 684 685 // Move the low 16 bits of a 64-bit register value to a 16-bit 686 // memory location. 687 void movw(Register dst, const Operand& src); 688 void movw(const Operand& dst, Register src); 689 void movw(const Operand& dst, Immediate imm); 690 691 // Move the offset of the label location relative to the current 692 // position (after the move) to the destination. 693 void movl(const Operand& dst, Label* src); 694 695 // Loads a pointer into a register with a relocation mode. 696 void movp(Register dst, void* ptr, RelocInfo::Mode rmode); 697 698 // Loads a 64-bit immediate into a register. 699 void movq(Register dst, int64_t value, 700 RelocInfo::Mode rmode = RelocInfo::NONE64); 701 void movq(Register dst, uint64_t value, 702 RelocInfo::Mode rmode = RelocInfo::NONE64); 703 704 void movsxbl(Register dst, Register src); 705 void movsxbl(Register dst, const Operand& src); 706 void movsxbq(Register dst, Register src); 707 void movsxbq(Register dst, const Operand& src); 708 void movsxwl(Register dst, Register src); 709 void movsxwl(Register dst, const Operand& src); 710 void movsxwq(Register dst, Register src); 711 void movsxwq(Register dst, const Operand& src); 712 void movsxlq(Register dst, Register src); 713 void movsxlq(Register dst, const Operand& src); 714 715 // Repeated moves. 716 717 void repmovsb(); 718 void repmovsw(); 719 void repmovsp() { emit_repmovs(kPointerSize); } 720 void repmovsl() { emit_repmovs(kInt32Size); } 721 void repmovsq() { emit_repmovs(kInt64Size); } 722 723 // Instruction to load from an immediate 64-bit pointer into RAX. 724 void load_rax(void* ptr, RelocInfo::Mode rmode); 725 void load_rax(ExternalReference ext); 726 727 // Conditional moves. 728 void cmovq(Condition cc, Register dst, Register src); 729 void cmovq(Condition cc, Register dst, const Operand& src); 730 void cmovl(Condition cc, Register dst, Register src); 731 void cmovl(Condition cc, Register dst, const Operand& src); 732 733 void cmpb(Register dst, Immediate src) { 734 immediate_arithmetic_op_8(0x7, dst, src); 735 } 736 737 void cmpb_al(Immediate src); 738 739 void cmpb(Register dst, Register src) { 740 arithmetic_op_8(0x3A, dst, src); 741 } 742 743 void cmpb(Register dst, const Operand& src) { 744 arithmetic_op_8(0x3A, dst, src); 745 } 746 747 void cmpb(const Operand& dst, Register src) { 748 arithmetic_op_8(0x38, src, dst); 749 } 750 751 void cmpb(const Operand& dst, Immediate src) { 752 immediate_arithmetic_op_8(0x7, dst, src); 753 } 754 755 void cmpw(const Operand& dst, Immediate src) { 756 immediate_arithmetic_op_16(0x7, dst, src); 757 } 758 759 void cmpw(Register dst, Immediate src) { 760 immediate_arithmetic_op_16(0x7, dst, src); 761 } 762 763 void cmpw(Register dst, const Operand& src) { 764 arithmetic_op_16(0x3B, dst, src); 765 } 766 767 void cmpw(Register dst, Register src) { 768 arithmetic_op_16(0x3B, dst, src); 769 } 770 771 void cmpw(const Operand& dst, Register src) { 772 arithmetic_op_16(0x39, src, dst); 773 } 774 775 void testb(Register reg, const Operand& op) { testb(op, reg); } 776 777 void testw(Register reg, const Operand& op) { testw(op, reg); } 778 779 void andb(Register dst, Immediate src) { 780 immediate_arithmetic_op_8(0x4, dst, src); 781 } 782 783 void decb(Register dst); 784 void decb(const Operand& dst); 785 786 // Lock prefix. 787 void lock(); 788 789 void xchgb(Register reg, const Operand& op); 790 void xchgw(Register reg, const Operand& op); 791 792 void cmpxchgb(const Operand& dst, Register src); 793 void cmpxchgw(const Operand& dst, Register src); 794 795 // Sign-extends rax into rdx:rax. 796 void cqo(); 797 // Sign-extends eax into edx:eax. 798 void cdq(); 799 800 // Multiply eax by src, put the result in edx:eax. 801 void mull(Register src); 802 void mull(const Operand& src); 803 // Multiply rax by src, put the result in rdx:rax. 804 void mulq(Register src); 805 806 #define DECLARE_SHIFT_INSTRUCTION(instruction, subcode) \ 807 void instruction##p(Register dst, Immediate imm8) { \ 808 shift(dst, imm8, subcode, kPointerSize); \ 809 } \ 810 \ 811 void instruction##l(Register dst, Immediate imm8) { \ 812 shift(dst, imm8, subcode, kInt32Size); \ 813 } \ 814 \ 815 void instruction##q(Register dst, Immediate imm8) { \ 816 shift(dst, imm8, subcode, kInt64Size); \ 817 } \ 818 \ 819 void instruction##p(Operand dst, Immediate imm8) { \ 820 shift(dst, imm8, subcode, kPointerSize); \ 821 } \ 822 \ 823 void instruction##l(Operand dst, Immediate imm8) { \ 824 shift(dst, imm8, subcode, kInt32Size); \ 825 } \ 826 \ 827 void instruction##q(Operand dst, Immediate imm8) { \ 828 shift(dst, imm8, subcode, kInt64Size); \ 829 } \ 830 \ 831 void instruction##p_cl(Register dst) { shift(dst, subcode, kPointerSize); } \ 832 \ 833 void instruction##l_cl(Register dst) { shift(dst, subcode, kInt32Size); } \ 834 \ 835 void instruction##q_cl(Register dst) { shift(dst, subcode, kInt64Size); } \ 836 \ 837 void instruction##p_cl(Operand dst) { shift(dst, subcode, kPointerSize); } \ 838 \ 839 void instruction##l_cl(Operand dst) { shift(dst, subcode, kInt32Size); } \ 840 \ 841 void instruction##q_cl(Operand dst) { shift(dst, subcode, kInt64Size); } 842 SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION) 843 #undef DECLARE_SHIFT_INSTRUCTION 844 845 // Shifts dst:src left by cl bits, affecting only dst. 846 void shld(Register dst, Register src); 847 848 // Shifts src:dst right by cl bits, affecting only dst. 849 void shrd(Register dst, Register src); 850 851 void store_rax(void* dst, RelocInfo::Mode mode); 852 void store_rax(ExternalReference ref); 853 854 void subb(Register dst, Immediate src) { 855 immediate_arithmetic_op_8(0x5, dst, src); 856 } 857 858 void testb(Register dst, Register src); 859 void testb(Register reg, Immediate mask); 860 void testb(const Operand& op, Immediate mask); 861 void testb(const Operand& op, Register reg); 862 863 void testw(Register dst, Register src); 864 void testw(Register reg, Immediate mask); 865 void testw(const Operand& op, Immediate mask); 866 void testw(const Operand& op, Register reg); 867 868 // Bit operations. 869 void bt(const Operand& dst, Register src); 870 void bts(const Operand& dst, Register src); 871 void bsrq(Register dst, Register src); 872 void bsrq(Register dst, const Operand& src); 873 void bsrl(Register dst, Register src); 874 void bsrl(Register dst, const Operand& src); 875 void bsfq(Register dst, Register src); 876 void bsfq(Register dst, const Operand& src); 877 void bsfl(Register dst, Register src); 878 void bsfl(Register dst, const Operand& src); 879 880 // Miscellaneous 881 void clc(); 882 void cld(); 883 void cpuid(); 884 void hlt(); 885 void int3(); 886 void nop(); 887 void ret(int imm16); 888 void ud2(); 889 void setcc(Condition cc, Register reg); 890 891 // Label operations & relative jumps (PPUM Appendix D) 892 // 893 // Takes a branch opcode (cc) and a label (L) and generates 894 // either a backward branch or a forward branch and links it 895 // to the label fixup chain. Usage: 896 // 897 // Label L; // unbound label 898 // j(cc, &L); // forward branch to unbound label 899 // bind(&L); // bind label to the current pc 900 // j(cc, &L); // backward branch to bound label 901 // bind(&L); // illegal: a label may be bound only once 902 // 903 // Note: The same Label can be used for forward and backward branches 904 // but it may be bound only once. 905 906 void bind(Label* L); // binds an unbound label L to the current code position 907 908 // Calls 909 // Call near relative 32-bit displacement, relative to next instruction. 910 void call(Label* L); 911 void call(Address entry, RelocInfo::Mode rmode); 912 void call(Handle<Code> target, 913 RelocInfo::Mode rmode = RelocInfo::CODE_TARGET, 914 TypeFeedbackId ast_id = TypeFeedbackId::None()); 915 916 // Calls directly to the given address using a relative offset. 917 // Should only ever be used in Code objects for calls within the 918 // same Code object. Should not be used when generating new code (use labels), 919 // but only when patching existing code. 920 void call(Address target); 921 922 // Call near absolute indirect, address in register 923 void call(Register adr); 924 925 // Jumps 926 // Jump short or near relative. 927 // Use a 32-bit signed displacement. 928 // Unconditional jump to L 929 void jmp(Label* L, Label::Distance distance = Label::kFar); 930 void jmp(Address entry, RelocInfo::Mode rmode); 931 void jmp(Handle<Code> target, RelocInfo::Mode rmode); 932 933 // Jump near absolute indirect (r64) 934 void jmp(Register adr); 935 void jmp(const Operand& src); 936 937 // Conditional jumps 938 void j(Condition cc, 939 Label* L, 940 Label::Distance distance = Label::kFar); 941 void j(Condition cc, Address entry, RelocInfo::Mode rmode); 942 void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode); 943 944 // Floating-point operations 945 void fld(int i); 946 947 void fld1(); 948 void fldz(); 949 void fldpi(); 950 void fldln2(); 951 952 void fld_s(const Operand& adr); 953 void fld_d(const Operand& adr); 954 955 void fstp_s(const Operand& adr); 956 void fstp_d(const Operand& adr); 957 void fstp(int index); 958 959 void fild_s(const Operand& adr); 960 void fild_d(const Operand& adr); 961 962 void fist_s(const Operand& adr); 963 964 void fistp_s(const Operand& adr); 965 void fistp_d(const Operand& adr); 966 967 void fisttp_s(const Operand& adr); 968 void fisttp_d(const Operand& adr); 969 970 void fabs(); 971 void fchs(); 972 973 void fadd(int i); 974 void fsub(int i); 975 void fmul(int i); 976 void fdiv(int i); 977 978 void fisub_s(const Operand& adr); 979 980 void faddp(int i = 1); 981 void fsubp(int i = 1); 982 void fsubrp(int i = 1); 983 void fmulp(int i = 1); 984 void fdivp(int i = 1); 985 void fprem(); 986 void fprem1(); 987 988 void fxch(int i = 1); 989 void fincstp(); 990 void ffree(int i = 0); 991 992 void ftst(); 993 void fucomp(int i); 994 void fucompp(); 995 void fucomi(int i); 996 void fucomip(); 997 998 void fcompp(); 999 void fnstsw_ax(); 1000 void fwait(); 1001 void fnclex(); 1002 1003 void fsin(); 1004 void fcos(); 1005 void fptan(); 1006 void fyl2x(); 1007 void f2xm1(); 1008 void fscale(); 1009 void fninit(); 1010 1011 void frndint(); 1012 1013 void sahf(); 1014 1015 // SSE instructions 1016 void addss(XMMRegister dst, XMMRegister src); 1017 void addss(XMMRegister dst, const Operand& src); 1018 void subss(XMMRegister dst, XMMRegister src); 1019 void subss(XMMRegister dst, const Operand& src); 1020 void mulss(XMMRegister dst, XMMRegister src); 1021 void mulss(XMMRegister dst, const Operand& src); 1022 void divss(XMMRegister dst, XMMRegister src); 1023 void divss(XMMRegister dst, const Operand& src); 1024 1025 void maxss(XMMRegister dst, XMMRegister src); 1026 void maxss(XMMRegister dst, const Operand& src); 1027 void minss(XMMRegister dst, XMMRegister src); 1028 void minss(XMMRegister dst, const Operand& src); 1029 1030 void sqrtss(XMMRegister dst, XMMRegister src); 1031 void sqrtss(XMMRegister dst, const Operand& src); 1032 1033 void ucomiss(XMMRegister dst, XMMRegister src); 1034 void ucomiss(XMMRegister dst, const Operand& src); 1035 void movaps(XMMRegister dst, XMMRegister src); 1036 1037 // Don't use this unless it's important to keep the 1038 // top half of the destination register unchanged. 1039 // Use movaps when moving float values and movd for integer 1040 // values in xmm registers. 1041 void movss(XMMRegister dst, XMMRegister src); 1042 1043 void movss(XMMRegister dst, const Operand& src); 1044 void movss(const Operand& dst, XMMRegister src); 1045 void shufps(XMMRegister dst, XMMRegister src, byte imm8); 1046 1047 void cvttss2si(Register dst, const Operand& src); 1048 void cvttss2si(Register dst, XMMRegister src); 1049 void cvtlsi2ss(XMMRegister dst, const Operand& src); 1050 void cvtlsi2ss(XMMRegister dst, Register src); 1051 1052 void andps(XMMRegister dst, XMMRegister src); 1053 void andps(XMMRegister dst, const Operand& src); 1054 void orps(XMMRegister dst, XMMRegister src); 1055 void orps(XMMRegister dst, const Operand& src); 1056 void xorps(XMMRegister dst, XMMRegister src); 1057 void xorps(XMMRegister dst, const Operand& src); 1058 1059 void addps(XMMRegister dst, XMMRegister src); 1060 void addps(XMMRegister dst, const Operand& src); 1061 void subps(XMMRegister dst, XMMRegister src); 1062 void subps(XMMRegister dst, const Operand& src); 1063 void mulps(XMMRegister dst, XMMRegister src); 1064 void mulps(XMMRegister dst, const Operand& src); 1065 void divps(XMMRegister dst, XMMRegister src); 1066 void divps(XMMRegister dst, const Operand& src); 1067 1068 void movmskps(Register dst, XMMRegister src); 1069 1070 void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2, 1071 SIMDPrefix pp, LeadingOpcode m, VexW w); 1072 void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2, 1073 SIMDPrefix pp, LeadingOpcode m, VexW w); 1074 1075 // SSE2 instructions 1076 void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape, 1077 byte opcode); 1078 void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape, 1079 byte opcode); 1080 #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \ 1081 void instruction(XMMRegister dst, XMMRegister src) { \ 1082 sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \ 1083 } \ 1084 void instruction(XMMRegister dst, const Operand& src) { \ 1085 sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \ 1086 } 1087 1088 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION) 1089 #undef DECLARE_SSE2_INSTRUCTION 1090 1091 #define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \ 1092 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1093 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \ 1094 } \ 1095 void v##instruction(XMMRegister dst, XMMRegister src1, \ 1096 const Operand& src2) { \ 1097 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \ 1098 } 1099 1100 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION) 1101 #undef DECLARE_SSE2_AVX_INSTRUCTION 1102 1103 // SSE3 1104 void lddqu(XMMRegister dst, const Operand& src); 1105 1106 // SSSE3 1107 void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1, 1108 byte escape2, byte opcode); 1109 void ssse3_instr(XMMRegister dst, const Operand& src, byte prefix, 1110 byte escape1, byte escape2, byte opcode); 1111 1112 #define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \ 1113 opcode) \ 1114 void instruction(XMMRegister dst, XMMRegister src) { \ 1115 ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ 1116 } \ 1117 void instruction(XMMRegister dst, const Operand& src) { \ 1118 ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ 1119 } 1120 1121 SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION) 1122 #undef DECLARE_SSSE3_INSTRUCTION 1123 1124 // SSE4 1125 void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1, 1126 byte escape2, byte opcode); 1127 void sse4_instr(XMMRegister dst, const Operand& src, byte prefix, 1128 byte escape1, byte escape2, byte opcode); 1129 #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \ 1130 opcode) \ 1131 void instruction(XMMRegister dst, XMMRegister src) { \ 1132 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ 1133 } \ 1134 void instruction(XMMRegister dst, const Operand& src) { \ 1135 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ 1136 } 1137 1138 SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION) 1139 #undef DECLARE_SSE4_INSTRUCTION 1140 1141 #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \ 1142 opcode) \ 1143 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1144 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \ 1145 } \ 1146 void v##instruction(XMMRegister dst, XMMRegister src1, \ 1147 const Operand& src2) { \ 1148 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \ 1149 } 1150 1151 SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION) 1152 SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION) 1153 #undef DECLARE_SSE34_AVX_INSTRUCTION 1154 1155 void movd(XMMRegister dst, Register src); 1156 void movd(XMMRegister dst, const Operand& src); 1157 void movd(Register dst, XMMRegister src); 1158 void movq(XMMRegister dst, Register src); 1159 void movq(Register dst, XMMRegister src); 1160 void movq(XMMRegister dst, XMMRegister src); 1161 1162 // Don't use this unless it's important to keep the 1163 // top half of the destination register unchanged. 1164 // Use movapd when moving double values and movq for integer 1165 // values in xmm registers. 1166 void movsd(XMMRegister dst, XMMRegister src); 1167 1168 void movsd(const Operand& dst, XMMRegister src); 1169 void movsd(XMMRegister dst, const Operand& src); 1170 1171 void movdqa(const Operand& dst, XMMRegister src); 1172 void movdqa(XMMRegister dst, const Operand& src); 1173 1174 void movdqu(const Operand& dst, XMMRegister src); 1175 void movdqu(XMMRegister dst, const Operand& src); 1176 1177 void movapd(XMMRegister dst, XMMRegister src); 1178 void movupd(XMMRegister dst, const Operand& src); 1179 void movupd(const Operand& dst, XMMRegister src); 1180 1181 void psllq(XMMRegister reg, byte imm8); 1182 void psrlq(XMMRegister reg, byte imm8); 1183 void psllw(XMMRegister reg, byte imm8); 1184 void pslld(XMMRegister reg, byte imm8); 1185 void psrlw(XMMRegister reg, byte imm8); 1186 void psrld(XMMRegister reg, byte imm8); 1187 void psraw(XMMRegister reg, byte imm8); 1188 void psrad(XMMRegister reg, byte imm8); 1189 1190 void cvttsd2si(Register dst, const Operand& src); 1191 void cvttsd2si(Register dst, XMMRegister src); 1192 void cvttss2siq(Register dst, XMMRegister src); 1193 void cvttss2siq(Register dst, const Operand& src); 1194 void cvttsd2siq(Register dst, XMMRegister src); 1195 void cvttsd2siq(Register dst, const Operand& src); 1196 1197 void cvtlsi2sd(XMMRegister dst, const Operand& src); 1198 void cvtlsi2sd(XMMRegister dst, Register src); 1199 1200 void cvtqsi2ss(XMMRegister dst, const Operand& src); 1201 void cvtqsi2ss(XMMRegister dst, Register src); 1202 1203 void cvtqsi2sd(XMMRegister dst, const Operand& src); 1204 void cvtqsi2sd(XMMRegister dst, Register src); 1205 1206 1207 void cvtss2sd(XMMRegister dst, XMMRegister src); 1208 void cvtss2sd(XMMRegister dst, const Operand& src); 1209 void cvtsd2ss(XMMRegister dst, XMMRegister src); 1210 void cvtsd2ss(XMMRegister dst, const Operand& src); 1211 1212 void cvtsd2si(Register dst, XMMRegister src); 1213 void cvtsd2siq(Register dst, XMMRegister src); 1214 1215 void addsd(XMMRegister dst, XMMRegister src); 1216 void addsd(XMMRegister dst, const Operand& src); 1217 void subsd(XMMRegister dst, XMMRegister src); 1218 void subsd(XMMRegister dst, const Operand& src); 1219 void mulsd(XMMRegister dst, XMMRegister src); 1220 void mulsd(XMMRegister dst, const Operand& src); 1221 void divsd(XMMRegister dst, XMMRegister src); 1222 void divsd(XMMRegister dst, const Operand& src); 1223 1224 void maxsd(XMMRegister dst, XMMRegister src); 1225 void maxsd(XMMRegister dst, const Operand& src); 1226 void minsd(XMMRegister dst, XMMRegister src); 1227 void minsd(XMMRegister dst, const Operand& src); 1228 1229 void andpd(XMMRegister dst, XMMRegister src); 1230 void andpd(XMMRegister dst, const Operand& src); 1231 void orpd(XMMRegister dst, XMMRegister src); 1232 void orpd(XMMRegister dst, const Operand& src); 1233 void xorpd(XMMRegister dst, XMMRegister src); 1234 void xorpd(XMMRegister dst, const Operand& src); 1235 void sqrtsd(XMMRegister dst, XMMRegister src); 1236 void sqrtsd(XMMRegister dst, const Operand& src); 1237 1238 void ucomisd(XMMRegister dst, XMMRegister src); 1239 void ucomisd(XMMRegister dst, const Operand& src); 1240 void cmpltsd(XMMRegister dst, XMMRegister src); 1241 1242 void movmskpd(Register dst, XMMRegister src); 1243 1244 void punpckldq(XMMRegister dst, XMMRegister src); 1245 void punpckldq(XMMRegister dst, const Operand& src); 1246 void punpckhdq(XMMRegister dst, XMMRegister src); 1247 1248 // SSE 4.1 instruction 1249 void insertps(XMMRegister dst, XMMRegister src, byte imm8); 1250 void extractps(Register dst, XMMRegister src, byte imm8); 1251 void pextrb(Register dst, XMMRegister src, int8_t imm8); 1252 void pextrb(const Operand& dst, XMMRegister src, int8_t imm8); 1253 void pextrw(Register dst, XMMRegister src, int8_t imm8); 1254 void pextrw(const Operand& dst, XMMRegister src, int8_t imm8); 1255 void pextrd(Register dst, XMMRegister src, int8_t imm8); 1256 void pextrd(const Operand& dst, XMMRegister src, int8_t imm8); 1257 void pinsrb(XMMRegister dst, Register src, int8_t imm8); 1258 void pinsrb(XMMRegister dst, const Operand& src, int8_t imm8); 1259 void pinsrw(XMMRegister dst, Register src, int8_t imm8); 1260 void pinsrw(XMMRegister dst, const Operand& src, int8_t imm8); 1261 void pinsrd(XMMRegister dst, Register src, int8_t imm8); 1262 void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8); 1263 1264 void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode); 1265 void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode); 1266 1267 void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp); 1268 void cmpps(XMMRegister dst, const Operand& src, int8_t cmp); 1269 void cmppd(XMMRegister dst, XMMRegister src, int8_t cmp); 1270 void cmppd(XMMRegister dst, const Operand& src, int8_t cmp); 1271 1272 #define SSE_CMP_P(instr, imm8) \ 1273 void instr##ps(XMMRegister dst, XMMRegister src) { cmpps(dst, src, imm8); } \ 1274 void instr##ps(XMMRegister dst, const Operand& src) { \ 1275 cmpps(dst, src, imm8); \ 1276 } \ 1277 void instr##pd(XMMRegister dst, XMMRegister src) { cmppd(dst, src, imm8); } \ 1278 void instr##pd(XMMRegister dst, const Operand& src) { cmppd(dst, src, imm8); } 1279 1280 SSE_CMP_P(cmpeq, 0x0); 1281 SSE_CMP_P(cmplt, 0x1); 1282 SSE_CMP_P(cmple, 0x2); 1283 SSE_CMP_P(cmpneq, 0x4); 1284 SSE_CMP_P(cmpnlt, 0x5); 1285 SSE_CMP_P(cmpnle, 0x6); 1286 1287 #undef SSE_CMP_P 1288 1289 void minps(XMMRegister dst, XMMRegister src); 1290 void minps(XMMRegister dst, const Operand& src); 1291 void maxps(XMMRegister dst, XMMRegister src); 1292 void maxps(XMMRegister dst, const Operand& src); 1293 void rcpps(XMMRegister dst, XMMRegister src); 1294 void rcpps(XMMRegister dst, const Operand& src); 1295 void rsqrtps(XMMRegister dst, XMMRegister src); 1296 void rsqrtps(XMMRegister dst, const Operand& src); 1297 void sqrtps(XMMRegister dst, XMMRegister src); 1298 void sqrtps(XMMRegister dst, const Operand& src); 1299 void movups(XMMRegister dst, XMMRegister src); 1300 void movups(XMMRegister dst, const Operand& src); 1301 void movups(const Operand& dst, XMMRegister src); 1302 void psrldq(XMMRegister dst, uint8_t shift); 1303 void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle); 1304 void pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle); 1305 void cvtdq2ps(XMMRegister dst, XMMRegister src); 1306 void cvtdq2ps(XMMRegister dst, const Operand& src); 1307 1308 // AVX instruction 1309 void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1310 vfmasd(0x99, dst, src1, src2); 1311 } 1312 void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1313 vfmasd(0xa9, dst, src1, src2); 1314 } 1315 void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1316 vfmasd(0xb9, dst, src1, src2); 1317 } 1318 void vfmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1319 vfmasd(0x99, dst, src1, src2); 1320 } 1321 void vfmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1322 vfmasd(0xa9, dst, src1, src2); 1323 } 1324 void vfmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1325 vfmasd(0xb9, dst, src1, src2); 1326 } 1327 void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1328 vfmasd(0x9b, dst, src1, src2); 1329 } 1330 void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1331 vfmasd(0xab, dst, src1, src2); 1332 } 1333 void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1334 vfmasd(0xbb, dst, src1, src2); 1335 } 1336 void vfmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1337 vfmasd(0x9b, dst, src1, src2); 1338 } 1339 void vfmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1340 vfmasd(0xab, dst, src1, src2); 1341 } 1342 void vfmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1343 vfmasd(0xbb, dst, src1, src2); 1344 } 1345 void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1346 vfmasd(0x9d, dst, src1, src2); 1347 } 1348 void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1349 vfmasd(0xad, dst, src1, src2); 1350 } 1351 void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1352 vfmasd(0xbd, dst, src1, src2); 1353 } 1354 void vfnmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1355 vfmasd(0x9d, dst, src1, src2); 1356 } 1357 void vfnmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1358 vfmasd(0xad, dst, src1, src2); 1359 } 1360 void vfnmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1361 vfmasd(0xbd, dst, src1, src2); 1362 } 1363 void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1364 vfmasd(0x9f, dst, src1, src2); 1365 } 1366 void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1367 vfmasd(0xaf, dst, src1, src2); 1368 } 1369 void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1370 vfmasd(0xbf, dst, src1, src2); 1371 } 1372 void vfnmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1373 vfmasd(0x9f, dst, src1, src2); 1374 } 1375 void vfnmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1376 vfmasd(0xaf, dst, src1, src2); 1377 } 1378 void vfnmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1379 vfmasd(0xbf, dst, src1, src2); 1380 } 1381 void vfmasd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); 1382 void vfmasd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); 1383 1384 void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1385 vfmass(0x99, dst, src1, src2); 1386 } 1387 void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1388 vfmass(0xa9, dst, src1, src2); 1389 } 1390 void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1391 vfmass(0xb9, dst, src1, src2); 1392 } 1393 void vfmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1394 vfmass(0x99, dst, src1, src2); 1395 } 1396 void vfmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1397 vfmass(0xa9, dst, src1, src2); 1398 } 1399 void vfmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1400 vfmass(0xb9, dst, src1, src2); 1401 } 1402 void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1403 vfmass(0x9b, dst, src1, src2); 1404 } 1405 void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1406 vfmass(0xab, dst, src1, src2); 1407 } 1408 void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1409 vfmass(0xbb, dst, src1, src2); 1410 } 1411 void vfmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1412 vfmass(0x9b, dst, src1, src2); 1413 } 1414 void vfmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1415 vfmass(0xab, dst, src1, src2); 1416 } 1417 void vfmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1418 vfmass(0xbb, dst, src1, src2); 1419 } 1420 void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1421 vfmass(0x9d, dst, src1, src2); 1422 } 1423 void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1424 vfmass(0xad, dst, src1, src2); 1425 } 1426 void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1427 vfmass(0xbd, dst, src1, src2); 1428 } 1429 void vfnmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1430 vfmass(0x9d, dst, src1, src2); 1431 } 1432 void vfnmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1433 vfmass(0xad, dst, src1, src2); 1434 } 1435 void vfnmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1436 vfmass(0xbd, dst, src1, src2); 1437 } 1438 void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1439 vfmass(0x9f, dst, src1, src2); 1440 } 1441 void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1442 vfmass(0xaf, dst, src1, src2); 1443 } 1444 void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1445 vfmass(0xbf, dst, src1, src2); 1446 } 1447 void vfnmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1448 vfmass(0x9f, dst, src1, src2); 1449 } 1450 void vfnmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1451 vfmass(0xaf, dst, src1, src2); 1452 } 1453 void vfnmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1454 vfmass(0xbf, dst, src1, src2); 1455 } 1456 void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); 1457 void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); 1458 1459 void vmovd(XMMRegister dst, Register src); 1460 void vmovd(XMMRegister dst, const Operand& src); 1461 void vmovd(Register dst, XMMRegister src); 1462 void vmovq(XMMRegister dst, Register src); 1463 void vmovq(XMMRegister dst, const Operand& src); 1464 void vmovq(Register dst, XMMRegister src); 1465 1466 void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1467 vsd(0x10, dst, src1, src2); 1468 } 1469 void vmovsd(XMMRegister dst, const Operand& src) { 1470 vsd(0x10, dst, xmm0, src); 1471 } 1472 void vmovsd(const Operand& dst, XMMRegister src) { 1473 vsd(0x11, src, xmm0, dst); 1474 } 1475 1476 #define AVX_SP_3(instr, opcode) \ 1477 AVX_S_3(instr, opcode) \ 1478 AVX_P_3(instr, opcode) 1479 1480 #define AVX_S_3(instr, opcode) \ 1481 AVX_3(instr##ss, opcode, vss) \ 1482 AVX_3(instr##sd, opcode, vsd) 1483 1484 #define AVX_P_3(instr, opcode) \ 1485 AVX_3(instr##ps, opcode, vps) \ 1486 AVX_3(instr##pd, opcode, vpd) 1487 1488 #define AVX_3(instr, opcode, impl) \ 1489 void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1490 impl(opcode, dst, src1, src2); \ 1491 } \ 1492 void instr(XMMRegister dst, XMMRegister src1, const Operand& src2) { \ 1493 impl(opcode, dst, src1, src2); \ 1494 } 1495 1496 AVX_SP_3(vsqrt, 0x51); 1497 AVX_SP_3(vadd, 0x58); 1498 AVX_SP_3(vsub, 0x5c); 1499 AVX_SP_3(vmul, 0x59); 1500 AVX_SP_3(vdiv, 0x5e); 1501 AVX_SP_3(vmin, 0x5d); 1502 AVX_SP_3(vmax, 0x5f); 1503 AVX_P_3(vand, 0x54); 1504 AVX_P_3(vor, 0x56); 1505 AVX_P_3(vxor, 0x57); 1506 AVX_3(vcvtsd2ss, 0x5a, vsd); 1507 1508 #undef AVX_3 1509 #undef AVX_S_3 1510 #undef AVX_P_3 1511 #undef AVX_SP_3 1512 1513 void vpsrlq(XMMRegister dst, XMMRegister src, byte imm8) { 1514 XMMRegister iop = {2}; 1515 vpd(0x73, iop, dst, src); 1516 emit(imm8); 1517 } 1518 void vpsllq(XMMRegister dst, XMMRegister src, byte imm8) { 1519 XMMRegister iop = {6}; 1520 vpd(0x73, iop, dst, src); 1521 emit(imm8); 1522 } 1523 void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1524 vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG); 1525 } 1526 void vcvtss2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1527 vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG); 1528 } 1529 void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) { 1530 XMMRegister isrc2 = {src2.code()}; 1531 vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0); 1532 } 1533 void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1534 vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0); 1535 } 1536 void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) { 1537 XMMRegister isrc2 = {src2.code()}; 1538 vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0); 1539 } 1540 void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1541 vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0); 1542 } 1543 void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) { 1544 XMMRegister isrc2 = {src2.code()}; 1545 vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1); 1546 } 1547 void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1548 vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1); 1549 } 1550 void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) { 1551 XMMRegister isrc2 = {src2.code()}; 1552 vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1); 1553 } 1554 void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { 1555 vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1); 1556 } 1557 void vcvttss2si(Register dst, XMMRegister src) { 1558 XMMRegister idst = {dst.code()}; 1559 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0); 1560 } 1561 void vcvttss2si(Register dst, const Operand& src) { 1562 XMMRegister idst = {dst.code()}; 1563 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0); 1564 } 1565 void vcvttsd2si(Register dst, XMMRegister src) { 1566 XMMRegister idst = {dst.code()}; 1567 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0); 1568 } 1569 void vcvttsd2si(Register dst, const Operand& src) { 1570 XMMRegister idst = {dst.code()}; 1571 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0); 1572 } 1573 void vcvttss2siq(Register dst, XMMRegister src) { 1574 XMMRegister idst = {dst.code()}; 1575 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1); 1576 } 1577 void vcvttss2siq(Register dst, const Operand& src) { 1578 XMMRegister idst = {dst.code()}; 1579 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1); 1580 } 1581 void vcvttsd2siq(Register dst, XMMRegister src) { 1582 XMMRegister idst = {dst.code()}; 1583 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1); 1584 } 1585 void vcvttsd2siq(Register dst, const Operand& src) { 1586 XMMRegister idst = {dst.code()}; 1587 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1); 1588 } 1589 void vcvtsd2si(Register dst, XMMRegister src) { 1590 XMMRegister idst = {dst.code()}; 1591 vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0); 1592 } 1593 void vucomisd(XMMRegister dst, XMMRegister src) { 1594 vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG); 1595 } 1596 void vucomisd(XMMRegister dst, const Operand& src) { 1597 vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG); 1598 } 1599 void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2, 1600 RoundingMode mode) { 1601 vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG); 1602 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception. 1603 } 1604 void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, 1605 RoundingMode mode) { 1606 vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG); 1607 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception. 1608 } 1609 1610 void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1611 vinstr(op, dst, src1, src2, kF2, k0F, kWIG); 1612 } 1613 void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) { 1614 vinstr(op, dst, src1, src2, kF2, k0F, kWIG); 1615 } 1616 1617 void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { 1618 vss(0x10, dst, src1, src2); 1619 } 1620 void vmovss(XMMRegister dst, const Operand& src) { 1621 vss(0x10, dst, xmm0, src); 1622 } 1623 void vmovss(const Operand& dst, XMMRegister src) { 1624 vss(0x11, src, xmm0, dst); 1625 } 1626 void vucomiss(XMMRegister dst, XMMRegister src); 1627 void vucomiss(XMMRegister dst, const Operand& src); 1628 void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); 1629 void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); 1630 1631 void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); } 1632 void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); } 1633 void vmovups(XMMRegister dst, const Operand& src) { 1634 vps(0x10, dst, xmm0, src); 1635 } 1636 void vmovups(const Operand& dst, XMMRegister src) { 1637 vps(0x11, src, xmm0, dst); 1638 } 1639 void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); } 1640 void vmovupd(XMMRegister dst, const Operand& src) { 1641 vpd(0x10, dst, xmm0, src); 1642 } 1643 void vmovupd(const Operand& dst, XMMRegister src) { 1644 vpd(0x11, src, xmm0, dst); 1645 } 1646 void vmovmskps(Register dst, XMMRegister src) { 1647 XMMRegister idst = {dst.code()}; 1648 vps(0x50, idst, xmm0, src); 1649 } 1650 void vmovmskpd(Register dst, XMMRegister src) { 1651 XMMRegister idst = {dst.code()}; 1652 vpd(0x50, idst, xmm0, src); 1653 } 1654 void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) { 1655 vps(0xC2, dst, src1, src2); 1656 emit(cmp); 1657 } 1658 void vcmpps(XMMRegister dst, XMMRegister src1, const Operand& src2, 1659 int8_t cmp) { 1660 vps(0xC2, dst, src1, src2); 1661 emit(cmp); 1662 } 1663 void vcmppd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) { 1664 vpd(0xC2, dst, src1, src2); 1665 emit(cmp); 1666 } 1667 void vcmppd(XMMRegister dst, XMMRegister src1, const Operand& src2, 1668 int8_t cmp) { 1669 vpd(0xC2, dst, src1, src2); 1670 emit(cmp); 1671 } 1672 1673 #define AVX_CMP_P(instr, imm8) \ 1674 void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1675 vcmpps(dst, src1, src2, imm8); \ 1676 } \ 1677 void instr##ps(XMMRegister dst, XMMRegister src1, const Operand& src2) { \ 1678 vcmpps(dst, src1, src2, imm8); \ 1679 } \ 1680 void instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ 1681 vcmppd(dst, src1, src2, imm8); \ 1682 } \ 1683 void instr##pd(XMMRegister dst, XMMRegister src1, const Operand& src2) { \ 1684 vcmppd(dst, src1, src2, imm8); \ 1685 } 1686 1687 AVX_CMP_P(vcmpeq, 0x0); 1688 AVX_CMP_P(vcmplt, 0x1); 1689 AVX_CMP_P(vcmple, 0x2); 1690 AVX_CMP_P(vcmpneq, 0x4); 1691 AVX_CMP_P(vcmpnlt, 0x5); 1692 AVX_CMP_P(vcmpnle, 0x6); 1693 1694 #undef AVX_CMP_P 1695 1696 void vlddqu(XMMRegister dst, const Operand& src) { 1697 vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG); 1698 } 1699 void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) { 1700 XMMRegister iop = {6}; 1701 vinstr(0x71, iop, dst, src, k66, k0F, kWIG); 1702 emit(imm8); 1703 } 1704 void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8) { 1705 XMMRegister iop = {2}; 1706 vinstr(0x71, iop, dst, src, k66, k0F, kWIG); 1707 emit(imm8); 1708 } 1709 void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8) { 1710 XMMRegister iop = {4}; 1711 vinstr(0x71, iop, dst, src, k66, k0F, kWIG); 1712 emit(imm8); 1713 } 1714 void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8) { 1715 XMMRegister iop = {6}; 1716 vinstr(0x72, iop, dst, src, k66, k0F, kWIG); 1717 emit(imm8); 1718 } 1719 void vpsrld(XMMRegister dst, XMMRegister src, int8_t imm8) { 1720 XMMRegister iop = {2}; 1721 vinstr(0x72, iop, dst, src, k66, k0F, kWIG); 1722 emit(imm8); 1723 } 1724 void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) { 1725 XMMRegister iop = {4}; 1726 vinstr(0x72, iop, dst, src, k66, k0F, kWIG); 1727 emit(imm8); 1728 } 1729 void vpextrb(Register dst, XMMRegister src, int8_t imm8) { 1730 XMMRegister idst = {dst.code()}; 1731 vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0); 1732 emit(imm8); 1733 } 1734 void vpextrb(const Operand& dst, XMMRegister src, int8_t imm8) { 1735 vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0); 1736 emit(imm8); 1737 } 1738 void vpextrw(Register dst, XMMRegister src, int8_t imm8) { 1739 XMMRegister idst = {dst.code()}; 1740 vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0); 1741 emit(imm8); 1742 } 1743 void vpextrw(const Operand& dst, XMMRegister src, int8_t imm8) { 1744 vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0); 1745 emit(imm8); 1746 } 1747 void vpextrd(Register dst, XMMRegister src, int8_t imm8) { 1748 XMMRegister idst = {dst.code()}; 1749 vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0); 1750 emit(imm8); 1751 } 1752 void vpextrd(const Operand& dst, XMMRegister src, int8_t imm8) { 1753 vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0); 1754 emit(imm8); 1755 } 1756 void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) { 1757 XMMRegister isrc = {src2.code()}; 1758 vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0); 1759 emit(imm8); 1760 } 1761 void vpinsrb(XMMRegister dst, XMMRegister src1, const Operand& src2, 1762 int8_t imm8) { 1763 vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0); 1764 emit(imm8); 1765 } 1766 void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) { 1767 XMMRegister isrc = {src2.code()}; 1768 vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0); 1769 emit(imm8); 1770 } 1771 void vpinsrw(XMMRegister dst, XMMRegister src1, const Operand& src2, 1772 int8_t imm8) { 1773 vinstr(0xc4, dst, src1, src2, k66, k0F, kW0); 1774 emit(imm8); 1775 } 1776 void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) { 1777 XMMRegister isrc = {src2.code()}; 1778 vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0); 1779 emit(imm8); 1780 } 1781 void vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2, 1782 int8_t imm8) { 1783 vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0); 1784 emit(imm8); 1785 } 1786 void vpshufd(XMMRegister dst, XMMRegister src, int8_t imm8) { 1787 vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG); 1788 emit(imm8); 1789 } 1790 1791 void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); 1792 void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); 1793 void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); 1794 void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); 1795 1796 // BMI instruction 1797 void andnq(Register dst, Register src1, Register src2) { 1798 bmi1q(0xf2, dst, src1, src2); 1799 } 1800 void andnq(Register dst, Register src1, const Operand& src2) { 1801 bmi1q(0xf2, dst, src1, src2); 1802 } 1803 void andnl(Register dst, Register src1, Register src2) { 1804 bmi1l(0xf2, dst, src1, src2); 1805 } 1806 void andnl(Register dst, Register src1, const Operand& src2) { 1807 bmi1l(0xf2, dst, src1, src2); 1808 } 1809 void bextrq(Register dst, Register src1, Register src2) { 1810 bmi1q(0xf7, dst, src2, src1); 1811 } 1812 void bextrq(Register dst, const Operand& src1, Register src2) { 1813 bmi1q(0xf7, dst, src2, src1); 1814 } 1815 void bextrl(Register dst, Register src1, Register src2) { 1816 bmi1l(0xf7, dst, src2, src1); 1817 } 1818 void bextrl(Register dst, const Operand& src1, Register src2) { 1819 bmi1l(0xf7, dst, src2, src1); 1820 } 1821 void blsiq(Register dst, Register src) { 1822 Register ireg = {3}; 1823 bmi1q(0xf3, ireg, dst, src); 1824 } 1825 void blsiq(Register dst, const Operand& src) { 1826 Register ireg = {3}; 1827 bmi1q(0xf3, ireg, dst, src); 1828 } 1829 void blsil(Register dst, Register src) { 1830 Register ireg = {3}; 1831 bmi1l(0xf3, ireg, dst, src); 1832 } 1833 void blsil(Register dst, const Operand& src) { 1834 Register ireg = {3}; 1835 bmi1l(0xf3, ireg, dst, src); 1836 } 1837 void blsmskq(Register dst, Register src) { 1838 Register ireg = {2}; 1839 bmi1q(0xf3, ireg, dst, src); 1840 } 1841 void blsmskq(Register dst, const Operand& src) { 1842 Register ireg = {2}; 1843 bmi1q(0xf3, ireg, dst, src); 1844 } 1845 void blsmskl(Register dst, Register src) { 1846 Register ireg = {2}; 1847 bmi1l(0xf3, ireg, dst, src); 1848 } 1849 void blsmskl(Register dst, const Operand& src) { 1850 Register ireg = {2}; 1851 bmi1l(0xf3, ireg, dst, src); 1852 } 1853 void blsrq(Register dst, Register src) { 1854 Register ireg = {1}; 1855 bmi1q(0xf3, ireg, dst, src); 1856 } 1857 void blsrq(Register dst, const Operand& src) { 1858 Register ireg = {1}; 1859 bmi1q(0xf3, ireg, dst, src); 1860 } 1861 void blsrl(Register dst, Register src) { 1862 Register ireg = {1}; 1863 bmi1l(0xf3, ireg, dst, src); 1864 } 1865 void blsrl(Register dst, const Operand& src) { 1866 Register ireg = {1}; 1867 bmi1l(0xf3, ireg, dst, src); 1868 } 1869 void tzcntq(Register dst, Register src); 1870 void tzcntq(Register dst, const Operand& src); 1871 void tzcntl(Register dst, Register src); 1872 void tzcntl(Register dst, const Operand& src); 1873 1874 void lzcntq(Register dst, Register src); 1875 void lzcntq(Register dst, const Operand& src); 1876 void lzcntl(Register dst, Register src); 1877 void lzcntl(Register dst, const Operand& src); 1878 1879 void popcntq(Register dst, Register src); 1880 void popcntq(Register dst, const Operand& src); 1881 void popcntl(Register dst, Register src); 1882 void popcntl(Register dst, const Operand& src); 1883 1884 void bzhiq(Register dst, Register src1, Register src2) { 1885 bmi2q(kNone, 0xf5, dst, src2, src1); 1886 } 1887 void bzhiq(Register dst, const Operand& src1, Register src2) { 1888 bmi2q(kNone, 0xf5, dst, src2, src1); 1889 } 1890 void bzhil(Register dst, Register src1, Register src2) { 1891 bmi2l(kNone, 0xf5, dst, src2, src1); 1892 } 1893 void bzhil(Register dst, const Operand& src1, Register src2) { 1894 bmi2l(kNone, 0xf5, dst, src2, src1); 1895 } 1896 void mulxq(Register dst1, Register dst2, Register src) { 1897 bmi2q(kF2, 0xf6, dst1, dst2, src); 1898 } 1899 void mulxq(Register dst1, Register dst2, const Operand& src) { 1900 bmi2q(kF2, 0xf6, dst1, dst2, src); 1901 } 1902 void mulxl(Register dst1, Register dst2, Register src) { 1903 bmi2l(kF2, 0xf6, dst1, dst2, src); 1904 } 1905 void mulxl(Register dst1, Register dst2, const Operand& src) { 1906 bmi2l(kF2, 0xf6, dst1, dst2, src); 1907 } 1908 void pdepq(Register dst, Register src1, Register src2) { 1909 bmi2q(kF2, 0xf5, dst, src1, src2); 1910 } 1911 void pdepq(Register dst, Register src1, const Operand& src2) { 1912 bmi2q(kF2, 0xf5, dst, src1, src2); 1913 } 1914 void pdepl(Register dst, Register src1, Register src2) { 1915 bmi2l(kF2, 0xf5, dst, src1, src2); 1916 } 1917 void pdepl(Register dst, Register src1, const Operand& src2) { 1918 bmi2l(kF2, 0xf5, dst, src1, src2); 1919 } 1920 void pextq(Register dst, Register src1, Register src2) { 1921 bmi2q(kF3, 0xf5, dst, src1, src2); 1922 } 1923 void pextq(Register dst, Register src1, const Operand& src2) { 1924 bmi2q(kF3, 0xf5, dst, src1, src2); 1925 } 1926 void pextl(Register dst, Register src1, Register src2) { 1927 bmi2l(kF3, 0xf5, dst, src1, src2); 1928 } 1929 void pextl(Register dst, Register src1, const Operand& src2) { 1930 bmi2l(kF3, 0xf5, dst, src1, src2); 1931 } 1932 void sarxq(Register dst, Register src1, Register src2) { 1933 bmi2q(kF3, 0xf7, dst, src2, src1); 1934 } 1935 void sarxq(Register dst, const Operand& src1, Register src2) { 1936 bmi2q(kF3, 0xf7, dst, src2, src1); 1937 } 1938 void sarxl(Register dst, Register src1, Register src2) { 1939 bmi2l(kF3, 0xf7, dst, src2, src1); 1940 } 1941 void sarxl(Register dst, const Operand& src1, Register src2) { 1942 bmi2l(kF3, 0xf7, dst, src2, src1); 1943 } 1944 void shlxq(Register dst, Register src1, Register src2) { 1945 bmi2q(k66, 0xf7, dst, src2, src1); 1946 } 1947 void shlxq(Register dst, const Operand& src1, Register src2) { 1948 bmi2q(k66, 0xf7, dst, src2, src1); 1949 } 1950 void shlxl(Register dst, Register src1, Register src2) { 1951 bmi2l(k66, 0xf7, dst, src2, src1); 1952 } 1953 void shlxl(Register dst, const Operand& src1, Register src2) { 1954 bmi2l(k66, 0xf7, dst, src2, src1); 1955 } 1956 void shrxq(Register dst, Register src1, Register src2) { 1957 bmi2q(kF2, 0xf7, dst, src2, src1); 1958 } 1959 void shrxq(Register dst, const Operand& src1, Register src2) { 1960 bmi2q(kF2, 0xf7, dst, src2, src1); 1961 } 1962 void shrxl(Register dst, Register src1, Register src2) { 1963 bmi2l(kF2, 0xf7, dst, src2, src1); 1964 } 1965 void shrxl(Register dst, const Operand& src1, Register src2) { 1966 bmi2l(kF2, 0xf7, dst, src2, src1); 1967 } 1968 void rorxq(Register dst, Register src, byte imm8); 1969 void rorxq(Register dst, const Operand& src, byte imm8); 1970 void rorxl(Register dst, Register src, byte imm8); 1971 void rorxl(Register dst, const Operand& src, byte imm8); 1972 1973 // Check the code size generated from label to here. 1974 int SizeOfCodeGeneratedSince(Label* label) { 1975 return pc_offset() - label->pos(); 1976 } 1977 1978 // Mark address of a debug break slot. 1979 void RecordDebugBreakSlot(RelocInfo::Mode mode); 1980 1981 // Record a comment relocation entry that can be used by a disassembler. 1982 // Use --code-comments to enable. 1983 void RecordComment(const char* msg); 1984 1985 // Record a deoptimization reason that can be used by a log or cpu profiler. 1986 // Use --trace-deopt to enable. 1987 void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position, 1988 int id); 1989 1990 void PatchConstantPoolAccessInstruction(int pc_offset, int offset, 1991 ConstantPoolEntry::Access access, 1992 ConstantPoolEntry::Type type) { 1993 // No embedded constant pool support. 1994 UNREACHABLE(); 1995 } 1996 1997 void RecordProtectedInstructionLanding(int pc_offset); 1998 1999 // Writes a single word of data in the code stream. 2000 // Used for inline tables, e.g., jump-tables. 2001 void db(uint8_t data); 2002 void dd(uint32_t data); 2003 void dq(uint64_t data); 2004 void dp(uintptr_t data) { dq(data); } 2005 void dq(Label* label); 2006 2007 // Check if there is less than kGap bytes available in the buffer. 2008 // If this is the case, we need to grow the buffer before emitting 2009 // an instruction or relocation information. 2010 inline bool buffer_overflow() const { 2011 return pc_ >= reloc_info_writer.pos() - kGap; 2012 } 2013 2014 // Get the number of bytes available in the buffer. 2015 inline int available_space() const { 2016 return static_cast<int>(reloc_info_writer.pos() - pc_); 2017 } 2018 2019 static bool IsNop(Address addr); 2020 2021 // Avoid overflows for displacements etc. 2022 static const int kMaximalBufferSize = 512*MB; 2023 2024 byte byte_at(int pos) { return buffer_[pos]; } 2025 void set_byte_at(int pos, byte value) { buffer_[pos] = value; } 2026 2027 Address pc() const { return pc_; } 2028 2029 protected: 2030 // Call near indirect 2031 void call(const Operand& operand); 2032 2033 private: 2034 byte* addr_at(int pos) { return buffer_ + pos; } 2035 uint32_t long_at(int pos) { 2036 return *reinterpret_cast<uint32_t*>(addr_at(pos)); 2037 } 2038 void long_at_put(int pos, uint32_t x) { 2039 *reinterpret_cast<uint32_t*>(addr_at(pos)) = x; 2040 } 2041 2042 // code emission 2043 void GrowBuffer(); 2044 2045 void emit(byte x) { *pc_++ = x; } 2046 inline void emitl(uint32_t x); 2047 inline void emitp(void* x, RelocInfo::Mode rmode); 2048 inline void emitq(uint64_t x); 2049 inline void emitw(uint16_t x); 2050 inline void emit_code_target(Handle<Code> target, 2051 RelocInfo::Mode rmode, 2052 TypeFeedbackId ast_id = TypeFeedbackId::None()); 2053 inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode); 2054 inline void emit(Immediate x); 2055 2056 // Emits a REX prefix that encodes a 64-bit operand size and 2057 // the top bit of both register codes. 2058 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B. 2059 // REX.W is set. 2060 inline void emit_rex_64(XMMRegister reg, Register rm_reg); 2061 inline void emit_rex_64(Register reg, XMMRegister rm_reg); 2062 inline void emit_rex_64(Register reg, Register rm_reg); 2063 2064 // Emits a REX prefix that encodes a 64-bit operand size and 2065 // the top bit of the destination, index, and base register codes. 2066 // The high bit of reg is used for REX.R, the high bit of op's base 2067 // register is used for REX.B, and the high bit of op's index register 2068 // is used for REX.X. REX.W is set. 2069 inline void emit_rex_64(Register reg, const Operand& op); 2070 inline void emit_rex_64(XMMRegister reg, const Operand& op); 2071 2072 // Emits a REX prefix that encodes a 64-bit operand size and 2073 // the top bit of the register code. 2074 // The high bit of register is used for REX.B. 2075 // REX.W is set and REX.R and REX.X are clear. 2076 inline void emit_rex_64(Register rm_reg); 2077 2078 // Emits a REX prefix that encodes a 64-bit operand size and 2079 // the top bit of the index and base register codes. 2080 // The high bit of op's base register is used for REX.B, and the high 2081 // bit of op's index register is used for REX.X. 2082 // REX.W is set and REX.R clear. 2083 inline void emit_rex_64(const Operand& op); 2084 2085 // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size. 2086 void emit_rex_64() { emit(0x48); } 2087 2088 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B. 2089 // REX.W is clear. 2090 inline void emit_rex_32(Register reg, Register rm_reg); 2091 2092 // The high bit of reg is used for REX.R, the high bit of op's base 2093 // register is used for REX.B, and the high bit of op's index register 2094 // is used for REX.X. REX.W is cleared. 2095 inline void emit_rex_32(Register reg, const Operand& op); 2096 2097 // High bit of rm_reg goes to REX.B. 2098 // REX.W, REX.R and REX.X are clear. 2099 inline void emit_rex_32(Register rm_reg); 2100 2101 // High bit of base goes to REX.B and high bit of index to REX.X. 2102 // REX.W and REX.R are clear. 2103 inline void emit_rex_32(const Operand& op); 2104 2105 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B. 2106 // REX.W is cleared. If no REX bits are set, no byte is emitted. 2107 inline void emit_optional_rex_32(Register reg, Register rm_reg); 2108 2109 // The high bit of reg is used for REX.R, the high bit of op's base 2110 // register is used for REX.B, and the high bit of op's index register 2111 // is used for REX.X. REX.W is cleared. If no REX bits are set, nothing 2112 // is emitted. 2113 inline void emit_optional_rex_32(Register reg, const Operand& op); 2114 2115 // As for emit_optional_rex_32(Register, Register), except that 2116 // the registers are XMM registers. 2117 inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base); 2118 2119 // As for emit_optional_rex_32(Register, Register), except that 2120 // one of the registers is an XMM registers. 2121 inline void emit_optional_rex_32(XMMRegister reg, Register base); 2122 2123 // As for emit_optional_rex_32(Register, Register), except that 2124 // one of the registers is an XMM registers. 2125 inline void emit_optional_rex_32(Register reg, XMMRegister base); 2126 2127 // As for emit_optional_rex_32(Register, const Operand&), except that 2128 // the register is an XMM register. 2129 inline void emit_optional_rex_32(XMMRegister reg, const Operand& op); 2130 2131 // Optionally do as emit_rex_32(Register) if the register number has 2132 // the high bit set. 2133 inline void emit_optional_rex_32(Register rm_reg); 2134 inline void emit_optional_rex_32(XMMRegister rm_reg); 2135 2136 // Optionally do as emit_rex_32(const Operand&) if the operand register 2137 // numbers have a high bit set. 2138 inline void emit_optional_rex_32(const Operand& op); 2139 2140 void emit_rex(int size) { 2141 if (size == kInt64Size) { 2142 emit_rex_64(); 2143 } else { 2144 DCHECK(size == kInt32Size); 2145 } 2146 } 2147 2148 template<class P1> 2149 void emit_rex(P1 p1, int size) { 2150 if (size == kInt64Size) { 2151 emit_rex_64(p1); 2152 } else { 2153 DCHECK(size == kInt32Size); 2154 emit_optional_rex_32(p1); 2155 } 2156 } 2157 2158 template<class P1, class P2> 2159 void emit_rex(P1 p1, P2 p2, int size) { 2160 if (size == kInt64Size) { 2161 emit_rex_64(p1, p2); 2162 } else { 2163 DCHECK(size == kInt32Size); 2164 emit_optional_rex_32(p1, p2); 2165 } 2166 } 2167 2168 // Emit vex prefix 2169 void emit_vex2_byte0() { emit(0xc5); } 2170 inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l, 2171 SIMDPrefix pp); 2172 void emit_vex3_byte0() { emit(0xc4); } 2173 inline void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, LeadingOpcode m); 2174 inline void emit_vex3_byte1(XMMRegister reg, const Operand& rm, 2175 LeadingOpcode m); 2176 inline void emit_vex3_byte2(VexW w, XMMRegister v, VectorLength l, 2177 SIMDPrefix pp); 2178 inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm, 2179 VectorLength l, SIMDPrefix pp, LeadingOpcode m, 2180 VexW w); 2181 inline void emit_vex_prefix(Register reg, Register v, Register rm, 2182 VectorLength l, SIMDPrefix pp, LeadingOpcode m, 2183 VexW w); 2184 inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, const Operand& rm, 2185 VectorLength l, SIMDPrefix pp, LeadingOpcode m, 2186 VexW w); 2187 inline void emit_vex_prefix(Register reg, Register v, const Operand& rm, 2188 VectorLength l, SIMDPrefix pp, LeadingOpcode m, 2189 VexW w); 2190 2191 // Emit the ModR/M byte, and optionally the SIB byte and 2192 // 1- or 4-byte offset for a memory operand. Also encodes 2193 // the second operand of the operation, a register or operation 2194 // subcode, into the reg field of the ModR/M byte. 2195 void emit_operand(Register reg, const Operand& adr) { 2196 emit_operand(reg.low_bits(), adr); 2197 } 2198 2199 // Emit the ModR/M byte, and optionally the SIB byte and 2200 // 1- or 4-byte offset for a memory operand. Also used to encode 2201 // a three-bit opcode extension into the ModR/M byte. 2202 void emit_operand(int rm, const Operand& adr); 2203 2204 // Emit a ModR/M byte with registers coded in the reg and rm_reg fields. 2205 void emit_modrm(Register reg, Register rm_reg) { 2206 emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits()); 2207 } 2208 2209 // Emit a ModR/M byte with an operation subcode in the reg field and 2210 // a register in the rm_reg field. 2211 void emit_modrm(int code, Register rm_reg) { 2212 DCHECK(is_uint3(code)); 2213 emit(0xC0 | code << 3 | rm_reg.low_bits()); 2214 } 2215 2216 // Emit the code-object-relative offset of the label's position 2217 inline void emit_code_relative_offset(Label* label); 2218 2219 // The first argument is the reg field, the second argument is the r/m field. 2220 void emit_sse_operand(XMMRegister dst, XMMRegister src); 2221 void emit_sse_operand(XMMRegister reg, const Operand& adr); 2222 void emit_sse_operand(Register reg, const Operand& adr); 2223 void emit_sse_operand(XMMRegister dst, Register src); 2224 void emit_sse_operand(Register dst, XMMRegister src); 2225 void emit_sse_operand(XMMRegister dst); 2226 2227 // Emit machine code for one of the operations ADD, ADC, SUB, SBC, 2228 // AND, OR, XOR, or CMP. The encodings of these operations are all 2229 // similar, differing just in the opcode or in the reg field of the 2230 // ModR/M byte. 2231 void arithmetic_op_8(byte opcode, Register reg, Register rm_reg); 2232 void arithmetic_op_8(byte opcode, Register reg, const Operand& rm_reg); 2233 void arithmetic_op_16(byte opcode, Register reg, Register rm_reg); 2234 void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg); 2235 // Operate on operands/registers with pointer size, 32-bit or 64-bit size. 2236 void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size); 2237 void arithmetic_op(byte opcode, 2238 Register reg, 2239 const Operand& rm_reg, 2240 int size); 2241 // Operate on a byte in memory or register. 2242 void immediate_arithmetic_op_8(byte subcode, 2243 Register dst, 2244 Immediate src); 2245 void immediate_arithmetic_op_8(byte subcode, 2246 const Operand& dst, 2247 Immediate src); 2248 // Operate on a word in memory or register. 2249 void immediate_arithmetic_op_16(byte subcode, 2250 Register dst, 2251 Immediate src); 2252 void immediate_arithmetic_op_16(byte subcode, 2253 const Operand& dst, 2254 Immediate src); 2255 // Operate on operands/registers with pointer size, 32-bit or 64-bit size. 2256 void immediate_arithmetic_op(byte subcode, 2257 Register dst, 2258 Immediate src, 2259 int size); 2260 void immediate_arithmetic_op(byte subcode, 2261 const Operand& dst, 2262 Immediate src, 2263 int size); 2264 2265 // Emit machine code for a shift operation. 2266 void shift(Operand dst, Immediate shift_amount, int subcode, int size); 2267 void shift(Register dst, Immediate shift_amount, int subcode, int size); 2268 // Shift dst by cl % 64 bits. 2269 void shift(Register dst, int subcode, int size); 2270 void shift(Operand dst, int subcode, int size); 2271 2272 void emit_farith(int b1, int b2, int i); 2273 2274 // labels 2275 // void print(Label* L); 2276 void bind_to(Label* L, int pos); 2277 2278 // record reloc info for current pc_ 2279 void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0); 2280 2281 // Arithmetics 2282 void emit_add(Register dst, Register src, int size) { 2283 arithmetic_op(0x03, dst, src, size); 2284 } 2285 2286 void emit_add(Register dst, Immediate src, int size) { 2287 immediate_arithmetic_op(0x0, dst, src, size); 2288 } 2289 2290 void emit_add(Register dst, const Operand& src, int size) { 2291 arithmetic_op(0x03, dst, src, size); 2292 } 2293 2294 void emit_add(const Operand& dst, Register src, int size) { 2295 arithmetic_op(0x1, src, dst, size); 2296 } 2297 2298 void emit_add(const Operand& dst, Immediate src, int size) { 2299 immediate_arithmetic_op(0x0, dst, src, size); 2300 } 2301 2302 void emit_and(Register dst, Register src, int size) { 2303 arithmetic_op(0x23, dst, src, size); 2304 } 2305 2306 void emit_and(Register dst, const Operand& src, int size) { 2307 arithmetic_op(0x23, dst, src, size); 2308 } 2309 2310 void emit_and(const Operand& dst, Register src, int size) { 2311 arithmetic_op(0x21, src, dst, size); 2312 } 2313 2314 void emit_and(Register dst, Immediate src, int size) { 2315 immediate_arithmetic_op(0x4, dst, src, size); 2316 } 2317 2318 void emit_and(const Operand& dst, Immediate src, int size) { 2319 immediate_arithmetic_op(0x4, dst, src, size); 2320 } 2321 2322 void emit_cmp(Register dst, Register src, int size) { 2323 arithmetic_op(0x3B, dst, src, size); 2324 } 2325 2326 void emit_cmp(Register dst, const Operand& src, int size) { 2327 arithmetic_op(0x3B, dst, src, size); 2328 } 2329 2330 void emit_cmp(const Operand& dst, Register src, int size) { 2331 arithmetic_op(0x39, src, dst, size); 2332 } 2333 2334 void emit_cmp(Register dst, Immediate src, int size) { 2335 immediate_arithmetic_op(0x7, dst, src, size); 2336 } 2337 2338 void emit_cmp(const Operand& dst, Immediate src, int size) { 2339 immediate_arithmetic_op(0x7, dst, src, size); 2340 } 2341 2342 // Compare {al,ax,eax,rax} with src. If equal, set ZF and write dst into 2343 // src. Otherwise clear ZF and write src into {al,ax,eax,rax}. This 2344 // operation is only atomic if prefixed by the lock instruction. 2345 void emit_cmpxchg(const Operand& dst, Register src, int size); 2346 2347 void emit_dec(Register dst, int size); 2348 void emit_dec(const Operand& dst, int size); 2349 2350 // Divide rdx:rax by src. Quotient in rax, remainder in rdx when size is 64. 2351 // Divide edx:eax by lower 32 bits of src. Quotient in eax, remainder in edx 2352 // when size is 32. 2353 void emit_idiv(Register src, int size); 2354 void emit_div(Register src, int size); 2355 2356 // Signed multiply instructions. 2357 // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32. 2358 void emit_imul(Register src, int size); 2359 void emit_imul(const Operand& src, int size); 2360 void emit_imul(Register dst, Register src, int size); 2361 void emit_imul(Register dst, const Operand& src, int size); 2362 void emit_imul(Register dst, Register src, Immediate imm, int size); 2363 void emit_imul(Register dst, const Operand& src, Immediate imm, int size); 2364 2365 void emit_inc(Register dst, int size); 2366 void emit_inc(const Operand& dst, int size); 2367 2368 void emit_lea(Register dst, const Operand& src, int size); 2369 2370 void emit_mov(Register dst, const Operand& src, int size); 2371 void emit_mov(Register dst, Register src, int size); 2372 void emit_mov(const Operand& dst, Register src, int size); 2373 void emit_mov(Register dst, Immediate value, int size); 2374 void emit_mov(const Operand& dst, Immediate value, int size); 2375 2376 void emit_movzxb(Register dst, const Operand& src, int size); 2377 void emit_movzxb(Register dst, Register src, int size); 2378 void emit_movzxw(Register dst, const Operand& src, int size); 2379 void emit_movzxw(Register dst, Register src, int size); 2380 2381 void emit_neg(Register dst, int size); 2382 void emit_neg(const Operand& dst, int size); 2383 2384 void emit_not(Register dst, int size); 2385 void emit_not(const Operand& dst, int size); 2386 2387 void emit_or(Register dst, Register src, int size) { 2388 arithmetic_op(0x0B, dst, src, size); 2389 } 2390 2391 void emit_or(Register dst, const Operand& src, int size) { 2392 arithmetic_op(0x0B, dst, src, size); 2393 } 2394 2395 void emit_or(const Operand& dst, Register src, int size) { 2396 arithmetic_op(0x9, src, dst, size); 2397 } 2398 2399 void emit_or(Register dst, Immediate src, int size) { 2400 immediate_arithmetic_op(0x1, dst, src, size); 2401 } 2402 2403 void emit_or(const Operand& dst, Immediate src, int size) { 2404 immediate_arithmetic_op(0x1, dst, src, size); 2405 } 2406 2407 void emit_repmovs(int size); 2408 2409 void emit_sbb(Register dst, Register src, int size) { 2410 arithmetic_op(0x1b, dst, src, size); 2411 } 2412 2413 void emit_sub(Register dst, Register src, int size) { 2414 arithmetic_op(0x2B, dst, src, size); 2415 } 2416 2417 void emit_sub(Register dst, Immediate src, int size) { 2418 immediate_arithmetic_op(0x5, dst, src, size); 2419 } 2420 2421 void emit_sub(Register dst, const Operand& src, int size) { 2422 arithmetic_op(0x2B, dst, src, size); 2423 } 2424 2425 void emit_sub(const Operand& dst, Register src, int size) { 2426 arithmetic_op(0x29, src, dst, size); 2427 } 2428 2429 void emit_sub(const Operand& dst, Immediate src, int size) { 2430 immediate_arithmetic_op(0x5, dst, src, size); 2431 } 2432 2433 void emit_test(Register dst, Register src, int size); 2434 void emit_test(Register reg, Immediate mask, int size); 2435 void emit_test(const Operand& op, Register reg, int size); 2436 void emit_test(const Operand& op, Immediate mask, int size); 2437 void emit_test(Register reg, const Operand& op, int size) { 2438 return emit_test(op, reg, size); 2439 } 2440 2441 void emit_xchg(Register dst, Register src, int size); 2442 void emit_xchg(Register dst, const Operand& src, int size); 2443 2444 void emit_xor(Register dst, Register src, int size) { 2445 if (size == kInt64Size && dst.code() == src.code()) { 2446 // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore 2447 // there is no need to make this a 64 bit operation. 2448 arithmetic_op(0x33, dst, src, kInt32Size); 2449 } else { 2450 arithmetic_op(0x33, dst, src, size); 2451 } 2452 } 2453 2454 void emit_xor(Register dst, const Operand& src, int size) { 2455 arithmetic_op(0x33, dst, src, size); 2456 } 2457 2458 void emit_xor(Register dst, Immediate src, int size) { 2459 immediate_arithmetic_op(0x6, dst, src, size); 2460 } 2461 2462 void emit_xor(const Operand& dst, Immediate src, int size) { 2463 immediate_arithmetic_op(0x6, dst, src, size); 2464 } 2465 2466 void emit_xor(const Operand& dst, Register src, int size) { 2467 arithmetic_op(0x31, src, dst, size); 2468 } 2469 2470 // Most BMI instructions are similiar. 2471 void bmi1q(byte op, Register reg, Register vreg, Register rm); 2472 void bmi1q(byte op, Register reg, Register vreg, const Operand& rm); 2473 void bmi1l(byte op, Register reg, Register vreg, Register rm); 2474 void bmi1l(byte op, Register reg, Register vreg, const Operand& rm); 2475 void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm); 2476 void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, 2477 const Operand& rm); 2478 void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm); 2479 void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, 2480 const Operand& rm); 2481 2482 friend class CodePatcher; 2483 friend class EnsureSpace; 2484 friend class RegExpMacroAssemblerX64; 2485 2486 // code generation 2487 RelocInfoWriter reloc_info_writer; 2488 2489 // Internal reference positions, required for (potential) patching in 2490 // GrowBuffer(); contains only those internal references whose labels 2491 // are already bound. 2492 std::deque<int> internal_reference_positions_; 2493 2494 List< Handle<Code> > code_targets_; 2495 }; 2496 2497 2498 // Helper class that ensures that there is enough space for generating 2499 // instructions and relocation information. The constructor makes 2500 // sure that there is enough space and (in debug mode) the destructor 2501 // checks that we did not generate too much. 2502 class EnsureSpace BASE_EMBEDDED { 2503 public: 2504 explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) { 2505 if (assembler_->buffer_overflow()) assembler_->GrowBuffer(); 2506 #ifdef DEBUG 2507 space_before_ = assembler_->available_space(); 2508 #endif 2509 } 2510 2511 #ifdef DEBUG 2512 ~EnsureSpace() { 2513 int bytes_generated = space_before_ - assembler_->available_space(); 2514 DCHECK(bytes_generated < assembler_->kGap); 2515 } 2516 #endif 2517 2518 private: 2519 Assembler* assembler_; 2520 #ifdef DEBUG 2521 int space_before_; 2522 #endif 2523 }; 2524 2525 } // namespace internal 2526 } // namespace v8 2527 2528 #endif // V8_X64_ASSEMBLER_X64_H_ 2529