1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 /** 18 * @author Alexander V. Astapchuk 19 */ 20 21 /** 22 * @file 23 * @brief Main encoding routines and structures. 24 */ 25 26 #ifndef __ENC_BASE_H_INCLUDED__ 27 #define __ENC_BASE_H_INCLUDED__ 28 29 #include "enc_defs.h" 30 31 32 #include <stdlib.h> 33 #include <assert.h> 34 #include <memory.h> 35 36 ENCODER_NAMESPACE_START 37 struct MnemonicInfo; 38 struct OpcodeInfo; 39 struct Rex; 40 41 /** 42 * @brief Basic facilities for generation of processor's instructions. 43 * 44 * The class EncoderBase represents the basic facilities for the encoding of 45 * processor's instructions on IA32 and EM64T platforms. 46 * 47 * The class provides general interface to generate the instructions as well 48 * as to retrieve some static data about instructions (number of arguments, 49 * their roles, etc). 50 * 51 * Currently, the EncoderBase class is used for both LIL and Jitrino code 52 * generators. Each of these code generators has its own wrapper to adapt 53 * this general interface for specific needs - see encoder.h for LIL wrappers 54 * and Ia32Encoder.h for Jitrino's adapter. 55 * 56 * Interface is provided through static methods, no instances of EncoderBase 57 * to be created. 58 * 59 * @todo RIP-based addressing on EM64T - it's not yet supported currently. 60 */ 61 class EncoderBase { 62 public: 63 class Operands; 64 struct MnemonicDesc; 65 /** 66 * @brief Generates processor's instruction. 67 * 68 * @param stream - a buffer to generate into 69 * @param mn - \link Mnemonic mnemonic \endlink of the instruction 70 * @param opnds - operands for the instruction 71 * @returns (stream + length of the just generated instruction) 72 */ 73 static char * encode(char * stream, Mnemonic mn, const Operands& opnds); 74 static char * getOpndLocation(int index); 75 76 /** 77 * @brief Generates the smallest possible number of NOP-s. 78 * 79 * Effectively generates the smallest possible number of instructions, 80 * which are NOP-s for CPU. Normally used to make a code alignment. 81 * 82 * The method inserts exactly number of bytes specified. It's a caller's 83 * responsibility to make sure the buffer is big enough. 84 * 85 * @param stream - buffer where to generate code into, can not be NULL 86 * @param howMany - how many bytes to fill with NOP-s 87 * @return \c (stream+howMany) 88 */ 89 static char * nops(char * stream, unsigned howMany); 90 91 /** 92 * @brief Inserts a prefix into the code buffer. 93 * 94 * The method writes no more than one byte into the buffer. This is a 95 * caller's responsibility to make sure the buffer is big enough. 96 * 97 * @param stream - buffer where to insert the prefix 98 * @param pref - prefix to be inserted. If it's InstPrefix_Null, then 99 * no action performed and return value is \c stream. 100 * @return \c (stream+1) if pref is not InstPrefix_Null, or \c stream 101 * otherwise 102 */ 103 static char * prefix(char* stream, InstPrefix pref); 104 105 /** 106 * @brief Determines if operand with opndExt suites the position with instExt. 107 */ 108 static bool extAllowed(OpndExt opndExt, OpndExt instExt); 109 110 /** 111 * @brief Returns #MnemonicDesc by the given Mnemonic. 112 */ 113 static const MnemonicDesc * getMnemonicDesc(Mnemonic mn) 114 { 115 assert(mn < Mnemonic_Count); 116 return mnemonics + mn; 117 } 118 119 /** 120 * @brief Returns a Mnemonic for the given name. 121 * 122 * The lookup is case insensitive, if no mnemonic found for the given 123 * string, then Mnemonic_Null returned. 124 */ 125 static Mnemonic str2mnemonic(const char * mn_name); 126 127 /** 128 * @brief Returns a string representation of the given Mnemonic. 129 * 130 * If invalid mnemonic passed, then the behavior is unpredictable. 131 */ 132 static const char * getMnemonicString(Mnemonic mn) 133 { 134 return getMnemonicDesc(mn)->name; 135 } 136 137 static const char * toStr(Mnemonic mn) 138 { 139 return getMnemonicDesc(mn)->name; 140 } 141 142 143 /** 144 * @brief Description of operand. 145 * 146 * Description of an operand in opcode - its kind, size or RegName if 147 * operand must be a particular register. 148 */ 149 struct OpndDesc { 150 /** 151 * @brief Location of the operand. 152 * 153 * May be a mask, i.e. OpndKind_Imm|OpndKind_Mem. 154 */ 155 OpndKind kind; 156 /** 157 * @brief Size of the operand. 158 */ 159 OpndSize size; 160 /** 161 * @brief Extention of the operand. 162 */ 163 OpndExt ext; 164 /** 165 * @brief Appropriate RegName if operand must reside on a particular 166 * register (i.e. CWD/CDQ instructions), RegName_Null 167 * otherwise. 168 */ 169 RegName reg; 170 }; 171 172 /** 173 * @brief Description of operands' roles in instruction. 174 */ 175 struct OpndRolesDesc { 176 /** 177 * @brief Total number of operands in the operation. 178 */ 179 unsigned count; 180 /** 181 * @brief Number of defs in the operation. 182 */ 183 unsigned defCount; 184 /** 185 * @brief Number of uses in the operation. 186 */ 187 unsigned useCount; 188 /** 189 * @brief Operand roles, bit-packed. 190 * 191 * A bit-packed info about operands' roles. Each operand's role is 192 * described by two bits, counted from right-to-left - the less 193 * significant bits (0,1) represent operand#0. 194 * 195 * The mask is build by ORing #OpndRole_Def and #OpndRole_Use 196 * appropriately and shifting left, i.e. operand#0's role would be 197 * - '(OpndRole_Def|OpndRole_Use)' 198 * - opnd#1's role would be 'OpndRole_Use<<2' 199 * - and operand#2's role would be, say, 'OpndRole_Def<<4'. 200 */ 201 unsigned roles; 202 }; 203 204 /** 205 * @brief Extracts appropriate OpndRole for a given operand. 206 * 207 * The order of operands is left-to-right, i.e. for MOV, it 208 * would be 'MOV op0, op1' 209 */ 210 static OpndRole getOpndRoles(OpndRolesDesc ord, unsigned idx) 211 { 212 assert(idx < ord.count); 213 return (OpndRole)(ord.roles>>((ord.count-1-idx)*2) & 0x3); 214 } 215 216 /** 217 * @brief Info about single opcode - its opcode bytes, operands, 218 * operands' roles. 219 */ 220 union OpcodeDesc { 221 char dummy[128]; // To make total size a power of 2 222 223 struct { 224 /** 225 * @brief Raw opcode bytes. 226 * 227 * 'Raw' opcode bytes which do not require any analysis and are 228 * independent from arguments/sizes/etc (may include opcode size 229 * prefix). 230 */ 231 char opcode[5]; 232 unsigned opcode_len; 233 unsigned aux0; 234 unsigned aux1; 235 /** 236 * @brief Info about opcode's operands. 237 * 238 * The [3] mostly comes from IDIV/IMUL which both may have up to 3 239 * operands. 240 */ 241 OpndDesc opnds[3]; 242 unsigned first_opnd; 243 /** 244 * @brief Info about operands - total number, number of uses/defs, 245 * operands' roles. 246 */ 247 OpndRolesDesc roles; 248 /** 249 * @brief If not zero, then this is final OpcodeDesc structure in 250 * the list of opcodes for a given mnemonic. 251 */ 252 char last; 253 char platf; 254 }; 255 }; 256 public: 257 /** 258 * @brief General info about mnemonic. 259 */ 260 struct MnemonicDesc { 261 /** 262 * @brief The mnemonic itself. 263 */ 264 Mnemonic mn; 265 /** 266 * Various characteristics of mnemonic. 267 * @see MF_ 268 */ 269 unsigned flags; 270 /** 271 * @brief Operation's operand's count and roles. 272 * 273 * For the operations whose opcodes may use different number of 274 * operands (i.e. IMUL/SHL) either most common value used, or empty 275 * value left. 276 */ 277 OpndRolesDesc roles; 278 /** 279 * @brief Print name of the mnemonic. 280 */ 281 const char * name; 282 }; 283 284 285 /** 286 * @brief Magic number, shows a maximum value a hash code can take. 287 * 288 * For meaning and arithmetics see enc_tabl.cpp. 289 * 290 * The value was increased from '5155' to '8192' to make it aligned 291 * for faster access in EncoderBase::lookup(). 292 */ 293 static const unsigned int HASH_MAX = 8192; //5155; 294 /** 295 * @brief Empty value, used in hash-to-opcode map to show an empty slot. 296 */ 297 static const unsigned char NOHASH = 0xFF; 298 /** 299 * @brief The name says it all. 300 */ 301 static const unsigned char HASH_BITS_PER_OPERAND = 5; 302 303 /** 304 * @brief Contains info about a single instructions's operand - its 305 * location, size and a value for immediate or RegName for 306 * register operands. 307 */ 308 class Operand { 309 public: 310 /** 311 * @brief Initializes the instance with empty size and kind. 312 */ 313 Operand() : m_kind(OpndKind_Null), m_size(OpndSize_Null), m_ext(OpndExt_None), m_need_rex(false) {} 314 /** 315 * @brief Creates register operand from given RegName. 316 */ 317 Operand(RegName reg, OpndExt ext = OpndExt_None) : m_kind(getRegKind(reg)), 318 m_size(getRegSize(reg)), 319 m_ext(ext), m_reg(reg) 320 { 321 hash_it(); 322 } 323 /** 324 * @brief Creates register operand from given RegName and with the 325 * specified size and kind. 326 * 327 * Used to speedup Operand creation as there is no need to extract 328 * size and kind from the RegName. 329 * The provided size and kind must match the RegName's ones though. 330 */ 331 Operand(OpndSize sz, OpndKind kind, RegName reg, OpndExt ext = OpndExt_None) : 332 m_kind(kind), m_size(sz), m_ext(ext), m_reg(reg) 333 { 334 assert(m_size == getRegSize(reg)); 335 assert(m_kind == getRegKind(reg)); 336 hash_it(); 337 } 338 /** 339 * @brief Creates immediate operand with the given size and value. 340 */ 341 Operand(OpndSize size, long long ival, OpndExt ext = OpndExt_None) : 342 m_kind(OpndKind_Imm), m_size(size), m_ext(ext), m_imm64(ival) 343 { 344 hash_it(); 345 } 346 /** 347 * @brief Creates immediate operand of OpndSize_32. 348 */ 349 Operand(int ival, OpndExt ext = OpndExt_None) : 350 m_kind(OpndKind_Imm), m_size(OpndSize_32), m_ext(ext), m_imm64(ival) 351 { 352 hash_it(); 353 } 354 /** 355 * @brief Creates immediate operand of OpndSize_16. 356 */ 357 Operand(short ival, OpndExt ext = OpndExt_None) : 358 m_kind(OpndKind_Imm), m_size(OpndSize_16), m_ext(ext), m_imm64(ival) 359 { 360 hash_it(); 361 } 362 363 /** 364 * @brief Creates immediate operand of OpndSize_8. 365 */ 366 Operand(char ival, OpndExt ext = OpndExt_None) : 367 m_kind(OpndKind_Imm), m_size(OpndSize_8), m_ext(ext), m_imm64(ival) 368 { 369 hash_it(); 370 } 371 372 /** 373 * @brief Creates memory operand. 374 */ 375 Operand(OpndSize size, RegName base, RegName index, unsigned scale, 376 int disp, OpndExt ext = OpndExt_None) : m_kind(OpndKind_Mem), m_size(size), m_ext(ext) 377 { 378 m_base = base; 379 m_index = index; 380 m_scale = scale; 381 m_disp = disp; 382 hash_it(); 383 } 384 385 /** 386 * @brief Creates memory operand with only base and displacement. 387 */ 388 Operand(OpndSize size, RegName base, int disp, OpndExt ext = OpndExt_None) : 389 m_kind(OpndKind_Mem), m_size(size), m_ext(ext) 390 { 391 m_base = base; 392 m_index = RegName_Null; 393 m_scale = 0; 394 m_disp = disp; 395 hash_it(); 396 } 397 // 398 // general info 399 // 400 /** 401 * @brief Returns kind of the operand. 402 */ 403 OpndKind kind(void) const { return m_kind; } 404 /** 405 * @brief Returns size of the operand. 406 */ 407 OpndSize size(void) const { return m_size; } 408 /** 409 * @brief Returns extention of the operand. 410 */ 411 OpndExt ext(void) const { return m_ext; } 412 /** 413 * @brief Returns hash of the operand. 414 */ 415 unsigned hash(void) const { return m_hash; } 416 // 417 #ifdef _EM64T_ 418 bool need_rex(void) const { return m_need_rex; } 419 #else 420 bool need_rex(void) const { return false; } 421 #endif 422 /** 423 * @brief Tests whether operand is memory operand. 424 */ 425 bool is_mem(void) const { return is_placed_in(OpndKind_Mem); } 426 /** 427 * @brief Tests whether operand is immediate operand. 428 */ 429 bool is_imm(void) const { return is_placed_in(OpndKind_Imm); } 430 /** 431 * @brief Tests whether operand is register operand. 432 */ 433 bool is_reg(void) const { return is_placed_in(OpndKind_Reg); } 434 /** 435 * @brief Tests whether operand is general-purpose register operand. 436 */ 437 bool is_gpreg(void) const { return is_placed_in(OpndKind_GPReg); } 438 /** 439 * @brief Tests whether operand is float-point pseudo-register operand. 440 */ 441 bool is_fpreg(void) const { return is_placed_in(OpndKind_FPReg); } 442 /** 443 * @brief Tests whether operand is XMM register operand. 444 */ 445 bool is_xmmreg(void) const { return is_placed_in(OpndKind_XMMReg); } 446 #ifdef _HAVE_MMX_ 447 /** 448 * @brief Tests whether operand is MMX register operand. 449 */ 450 bool is_mmxreg(void) const { return is_placed_in(OpndKind_MMXReg); } 451 #endif 452 /** 453 * @brief Tests whether operand is signed immediate operand. 454 */ 455 //bool is_signed(void) const { assert(is_imm()); return m_is_signed; } 456 457 /** 458 * @brief Returns base of memory operand (RegName_Null if not memory). 459 */ 460 RegName base(void) const { return is_mem() ? m_base : RegName_Null; } 461 /** 462 * @brief Returns index of memory operand (RegName_Null if not memory). 463 */ 464 RegName index(void) const { return is_mem() ? m_index : RegName_Null; } 465 /** 466 * @brief Returns scale of memory operand (0 if not memory). 467 */ 468 unsigned scale(void) const { return is_mem() ? m_scale : 0; } 469 /** 470 * @brief Returns displacement of memory operand (0 if not memory). 471 */ 472 int disp(void) const { return is_mem() ? m_disp : 0; } 473 /** 474 * @brief Returns RegName of register operand (RegName_Null if not 475 * register). 476 */ 477 RegName reg(void) const { return is_reg() ? m_reg : RegName_Null; } 478 /** 479 * @brief Returns value of immediate operand (0 if not immediate). 480 */ 481 long long imm(void) const { return is_imm() ? m_imm64 : 0; } 482 private: 483 bool is_placed_in(OpndKind kd) const 484 { 485 return kd == OpndKind_Reg ? 486 m_kind == OpndKind_GPReg || 487 #ifdef _HAVE_MMX_ 488 m_kind == OpndKind_MMXReg || 489 #endif 490 m_kind == OpndKind_FPReg || 491 m_kind == OpndKind_XMMReg 492 : kd == m_kind; 493 } 494 void hash_it(void) 495 { 496 m_hash = get_size_hash(m_size) | get_kind_hash(m_kind); 497 #ifdef _EM64T_ 498 m_need_rex = false; 499 if (is_reg() && is_em64t_extra_reg(m_reg)) { 500 m_need_rex = true; 501 } 502 else if (is_mem() && (is_em64t_extra_reg(m_base) || 503 is_em64t_extra_reg(m_index))) { 504 m_need_rex = true; 505 } 506 #endif 507 } 508 // general info 509 OpndKind m_kind; 510 OpndSize m_size; 511 OpndExt m_ext; 512 // complex address form support 513 RegName m_base; 514 RegName m_index; 515 unsigned m_scale; 516 union { 517 int m_disp; 518 RegName m_reg; 519 long long m_imm64; 520 }; 521 unsigned m_hash; 522 bool m_need_rex; 523 friend class EncoderBase::Operands; 524 }; 525 /** 526 * @brief Simple container for up to 3 Operand-s. 527 */ 528 class Operands { 529 public: 530 Operands(void) 531 { 532 clear(); 533 } 534 Operands(const Operand& op0) 535 { 536 clear(); 537 add(op0); 538 } 539 540 Operands(const Operand& op0, const Operand& op1) 541 { 542 clear(); 543 add(op0); add(op1); 544 } 545 546 Operands(const Operand& op0, const Operand& op1, const Operand& op2) 547 { 548 clear(); 549 add(op0); add(op1); add(op2); 550 } 551 552 unsigned count(void) const { return m_count; } 553 unsigned hash(void) const { return m_hash; } 554 const Operand& operator[](unsigned idx) const 555 { 556 assert(idx<m_count); 557 return m_operands[idx]; 558 } 559 560 void add(const Operand& op) 561 { 562 assert(m_count < COUNTOF(m_operands)); 563 m_hash = (m_hash<<HASH_BITS_PER_OPERAND) | op.hash(); 564 m_operands[m_count++] = op; 565 m_need_rex = m_need_rex || op.m_need_rex; 566 } 567 #ifdef _EM64T_ 568 bool need_rex(void) const { return m_need_rex; } 569 #else 570 bool need_rex(void) const { return false; } 571 #endif 572 void clear(void) 573 { 574 m_count = 0; m_hash = 0; m_need_rex = false; 575 } 576 private: 577 unsigned m_count; 578 Operand m_operands[COUNTOF( ((OpcodeDesc*)NULL)->opnds )]; 579 unsigned m_hash; 580 bool m_need_rex; 581 }; 582 public: 583 #ifdef _DEBUG 584 /** 585 * Verifies some presumptions about encoding data table. 586 * Called automaticaly during statics initialization. 587 */ 588 static int verify(void); 589 #endif 590 591 private: 592 /** 593 * @brief Returns found OpcodeDesc by the given Mnemonic and operands. 594 */ 595 static const OpcodeDesc * lookup(Mnemonic mn, const Operands& opnds); 596 /** 597 * @brief Encodes mod/rm byte. 598 */ 599 static char* encodeModRM(char* stream, const Operands& opnds, 600 unsigned idx, const OpcodeDesc * odesc, Rex * prex); 601 /** 602 * @brief Encodes special things of opcode description - '/r', 'ib', etc. 603 */ 604 static char* encode_aux(char* stream, unsigned aux, 605 const Operands& opnds, const OpcodeDesc * odesc, 606 unsigned * pargsCount, Rex* prex); 607 #ifdef _EM64T_ 608 /** 609 * @brief Returns true if the 'reg' argument represents one of the new 610 * EM64T registers - R8(D)-R15(D). 611 * 612 * The 64 bits versions of 'old-fashion' registers, i.e. RAX are not 613 * considered as 'extra'. 614 */ 615 static bool is_em64t_extra_reg(const RegName reg) 616 { 617 if (needs_rex_r(reg)) { 618 return true; 619 } 620 if (RegName_SPL <= reg && reg <= RegName_R15L) { 621 return true; 622 } 623 return false; 624 } 625 static bool needs_rex_r(const RegName reg) 626 { 627 if (RegName_R8 <= reg && reg <= RegName_R15) { 628 return true; 629 } 630 if (RegName_R8D <= reg && reg <= RegName_R15D) { 631 return true; 632 } 633 if (RegName_R8S <= reg && reg <= RegName_R15S) { 634 return true; 635 } 636 if (RegName_R8L <= reg && reg <= RegName_R15L) { 637 return true; 638 } 639 if (RegName_XMM8 <= reg && reg <= RegName_XMM15) { 640 return true; 641 } 642 if (RegName_XMM8D <= reg && reg <= RegName_XMM15D) { 643 return true; 644 } 645 if (RegName_XMM8S <= reg && reg <= RegName_XMM15S) { 646 return true; 647 } 648 return false; 649 } 650 /** 651 * @brief Returns an 'processor's index' of the register - the index 652 * used to encode the register in ModRM/SIB bytes. 653 * 654 * For the new EM64T registers the 'HW index' differs from the index 655 * encoded in RegName. For old-fashion registers it's effectively the 656 * same as ::getRegIndex(RegName). 657 */ 658 static unsigned char getHWRegIndex(const RegName reg) 659 { 660 if (getRegKind(reg) != OpndKind_GPReg) { 661 return getRegIndex(reg); 662 } 663 if (RegName_SPL <= reg && reg<=RegName_DIL) { 664 return getRegIndex(reg); 665 } 666 if (RegName_R8L<= reg && reg<=RegName_R15L) { 667 return getRegIndex(reg) - getRegIndex(RegName_R8L); 668 } 669 return is_em64t_extra_reg(reg) ? 670 getRegIndex(reg)-getRegIndex(RegName_R8D) : getRegIndex(reg); 671 } 672 #else 673 static unsigned char getHWRegIndex(const RegName reg) 674 { 675 return getRegIndex(reg); 676 } 677 static bool is_em64t_extra_reg(const RegName reg) 678 { 679 return false; 680 } 681 #endif 682 public: 683 static unsigned char get_size_hash(OpndSize size) { 684 return (size <= OpndSize_64) ? size_hash[size] : 0xFF; 685 } 686 static unsigned char get_kind_hash(OpndKind kind) { 687 return (kind <= OpndKind_Mem) ? kind_hash[kind] : 0xFF; 688 } 689 690 /** 691 * @brief A table used for the fast computation of hash value. 692 * 693 * A change must be strictly balanced with hash-related functions and data 694 * in enc_base.h/.cpp. 695 */ 696 static const unsigned char size_hash[OpndSize_64+1]; 697 /** 698 * @brief A table used for the fast computation of hash value. 699 * 700 * A change must be strictly balanced with hash-related functions and data 701 * in enc_base.h/.cpp. 702 */ 703 static const unsigned char kind_hash[OpndKind_Mem+1]; 704 /** 705 * @brief Maximum number of opcodes used for a single mnemonic. 706 * 707 * No arithmetics behind the number, simply estimated. 708 */ 709 static const unsigned int MAX_OPCODES = 32; //20; 710 /** 711 * @brief Mapping between operands hash code and operands. 712 */ 713 static unsigned char opcodesHashMap[Mnemonic_Count][HASH_MAX]; 714 /** 715 * @brief Array of mnemonics. 716 */ 717 static MnemonicDesc mnemonics[Mnemonic_Count]; 718 /** 719 * @brief Array of available opcodes. 720 */ 721 static OpcodeDesc opcodes[Mnemonic_Count][MAX_OPCODES]; 722 723 static int buildTable(void); 724 static void buildMnemonicDesc(const MnemonicInfo * minfo); 725 /** 726 * @brief Computes hash value for the given operands. 727 */ 728 static unsigned short getHash(const OpcodeInfo* odesc); 729 /** 730 * @brief Dummy variable, for automatic invocation of buildTable() at 731 * startup. 732 */ 733 static int dummy; 734 735 static char * curRelOpnd[3]; 736 }; 737 738 ENCODER_NAMESPACE_END 739 740 #endif // ifndef __ENC_BASE_H_INCLUDED__ 741