1 /*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains common definitions used by both the disassembler and the table 12 * generator. 13 * Documentation for the disassembler can be found in X86Disassembler.h. 14 * 15 *===----------------------------------------------------------------------===*/ 16 17 /* 18 * This header file provides those definitions that need to be shared between 19 * the decoder and the table generator in a C-friendly manner. 20 */ 21 22 #ifndef X86DISASSEMBLERDECODERCOMMON_H 23 #define X86DISASSEMBLERDECODERCOMMON_H 24 25 #include "llvm/Support/DataTypes.h" 26 27 #define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers 28 #define CONTEXTS_SYM x86DisassemblerContexts 29 #define ONEBYTE_SYM x86DisassemblerOneByteOpcodes 30 #define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes 31 #define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes 32 #define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes 33 #define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes 34 #define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes 35 36 #define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers" 37 #define CONTEXTS_STR "x86DisassemblerContexts" 38 #define ONEBYTE_STR "x86DisassemblerOneByteOpcodes" 39 #define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes" 40 #define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes" 41 #define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes" 42 #define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes" 43 #define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes" 44 45 /* 46 * Attributes of an instruction that must be known before the opcode can be 47 * processed correctly. Most of these indicate the presence of particular 48 * prefixes, but ATTR_64BIT is simply an attribute of the decoding context. 49 */ 50 #define ATTRIBUTE_BITS \ 51 ENUM_ENTRY(ATTR_NONE, 0x00) \ 52 ENUM_ENTRY(ATTR_64BIT, 0x01) \ 53 ENUM_ENTRY(ATTR_XS, 0x02) \ 54 ENUM_ENTRY(ATTR_XD, 0x04) \ 55 ENUM_ENTRY(ATTR_REXW, 0x08) \ 56 ENUM_ENTRY(ATTR_OPSIZE, 0x10) \ 57 ENUM_ENTRY(ATTR_VEX, 0x20) \ 58 ENUM_ENTRY(ATTR_VEXL, 0x40) 59 60 #define ENUM_ENTRY(n, v) n = v, 61 enum attributeBits { 62 ATTRIBUTE_BITS 63 ATTR_max 64 }; 65 #undef ENUM_ENTRY 66 67 /* 68 * Combinations of the above attributes that are relevant to instruction 69 * decode. Although other combinations are possible, they can be reduced to 70 * these without affecting the ultimately decoded instruction. 71 */ 72 73 /* Class name Rank Rationale for rank assignment */ 74 #define INSTRUCTION_CONTEXTS \ 75 ENUM_ENTRY(IC, 0, "says nothing about the instruction") \ 76 ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \ 77 "64-bit mode but no more") \ 78 ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \ 79 "operands change width") \ 80 ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \ 81 "but not the operands") \ 82 ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \ 83 "but not the operands") \ 84 ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\ 85 "change width; overrides IC_OPSIZE") \ 86 ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \ 87 ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \ 88 "secondary") \ 89 ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \ 90 ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \ 91 "opcode") \ 92 ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \ 93 "IC_64BIT_REXW_XS") \ 94 ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \ 95 "else because this changes most " \ 96 "operands' meaning") \ 97 ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \ 98 ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \ 99 ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \ 100 ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \ 101 ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \ 102 ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \ 103 ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \ 104 ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \ 105 ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ 106 ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ 107 ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\ 108 ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") 109 110 111 #define ENUM_ENTRY(n, r, d) n, 112 typedef enum { 113 INSTRUCTION_CONTEXTS 114 IC_max 115 } InstructionContext; 116 #undef ENUM_ENTRY 117 118 /* 119 * Opcode types, which determine which decode table to use, both in the Intel 120 * manual and also for the decoder. 121 */ 122 typedef enum { 123 ONEBYTE = 0, 124 TWOBYTE = 1, 125 THREEBYTE_38 = 2, 126 THREEBYTE_3A = 3, 127 THREEBYTE_A6 = 4, 128 THREEBYTE_A7 = 5 129 } OpcodeType; 130 131 /* 132 * The following structs are used for the hierarchical decode table. After 133 * determining the instruction's class (i.e., which IC_* constant applies to 134 * it), the decoder reads the opcode. Some instructions require specific 135 * values of the ModR/M byte, so the ModR/M byte indexes into the final table. 136 * 137 * If a ModR/M byte is not required, "required" is left unset, and the values 138 * for each instructionID are identical. 139 */ 140 141 typedef uint16_t InstrUID; 142 143 /* 144 * ModRMDecisionType - describes the type of ModR/M decision, allowing the 145 * consumer to determine the number of entries in it. 146 * 147 * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded 148 * instruction is the same. 149 * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode 150 * corresponds to one instruction; otherwise, it corresponds to 151 * a different instruction. 152 * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond 153 * to a different instruction. 154 */ 155 156 #define MODRMTYPES \ 157 ENUM_ENTRY(MODRM_ONEENTRY) \ 158 ENUM_ENTRY(MODRM_SPLITRM) \ 159 ENUM_ENTRY(MODRM_FULL) 160 161 #define ENUM_ENTRY(n) n, 162 typedef enum { 163 MODRMTYPES 164 MODRM_max 165 } ModRMDecisionType; 166 #undef ENUM_ENTRY 167 168 /* 169 * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which 170 * instruction each possible value of the ModR/M byte corresponds to. Once 171 * this information is known, we have narrowed down to a single instruction. 172 */ 173 struct ModRMDecision { 174 uint8_t modrm_type; 175 176 /* The macro below must be defined wherever this file is included. */ 177 INSTRUCTION_IDS 178 }; 179 180 /* 181 * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at 182 * given a particular opcode. 183 */ 184 struct OpcodeDecision { 185 struct ModRMDecision modRMDecisions[256]; 186 }; 187 188 /* 189 * ContextDecision - Specifies which opcode->instruction tables to look at given 190 * a particular context (set of attributes). Since there are many possible 191 * contexts, the decoder first uses CONTEXTS_SYM to determine which context 192 * applies given a specific set of attributes. Hence there are only IC_max 193 * entries in this table, rather than 2^(ATTR_max). 194 */ 195 struct ContextDecision { 196 struct OpcodeDecision opcodeDecisions[IC_max]; 197 }; 198 199 /* 200 * Physical encodings of instruction operands. 201 */ 202 203 #define ENCODINGS \ 204 ENUM_ENTRY(ENCODING_NONE, "") \ 205 ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \ 206 ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \ 207 ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \ 208 ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \ 209 ENUM_ENTRY(ENCODING_CW, "2-byte") \ 210 ENUM_ENTRY(ENCODING_CD, "4-byte") \ 211 ENUM_ENTRY(ENCODING_CP, "6-byte") \ 212 ENUM_ENTRY(ENCODING_CO, "8-byte") \ 213 ENUM_ENTRY(ENCODING_CT, "10-byte") \ 214 ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \ 215 ENUM_ENTRY(ENCODING_IW, "2-byte") \ 216 ENUM_ENTRY(ENCODING_ID, "4-byte") \ 217 ENUM_ENTRY(ENCODING_IO, "8-byte") \ 218 ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \ 219 "the opcode byte") \ 220 ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \ 221 ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \ 222 ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \ 223 ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \ 224 "opcode byte") \ 225 \ 226 ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \ 227 ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \ 228 ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \ 229 "opcode byte") \ 230 ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \ 231 "in type") 232 233 #define ENUM_ENTRY(n, d) n, 234 typedef enum { 235 ENCODINGS 236 ENCODING_max 237 } OperandEncoding; 238 #undef ENUM_ENTRY 239 240 /* 241 * Semantic interpretations of instruction operands. 242 */ 243 244 #define TYPES \ 245 ENUM_ENTRY(TYPE_NONE, "") \ 246 ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \ 247 ENUM_ENTRY(TYPE_REL16, "2-byte") \ 248 ENUM_ENTRY(TYPE_REL32, "4-byte") \ 249 ENUM_ENTRY(TYPE_REL64, "8-byte") \ 250 ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \ 251 ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \ 252 ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \ 253 ENUM_ENTRY(TYPE_R8, "1-byte register operand") \ 254 ENUM_ENTRY(TYPE_R16, "2-byte") \ 255 ENUM_ENTRY(TYPE_R32, "4-byte") \ 256 ENUM_ENTRY(TYPE_R64, "8-byte") \ 257 ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \ 258 ENUM_ENTRY(TYPE_IMM16, "2-byte") \ 259 ENUM_ENTRY(TYPE_IMM32, "4-byte") \ 260 ENUM_ENTRY(TYPE_IMM64, "8-byte") \ 261 ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \ 262 ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \ 263 ENUM_ENTRY(TYPE_RM16, "2-byte") \ 264 ENUM_ENTRY(TYPE_RM32, "4-byte") \ 265 ENUM_ENTRY(TYPE_RM64, "8-byte") \ 266 ENUM_ENTRY(TYPE_M, "Memory operand") \ 267 ENUM_ENTRY(TYPE_M8, "1-byte") \ 268 ENUM_ENTRY(TYPE_M16, "2-byte") \ 269 ENUM_ENTRY(TYPE_M32, "4-byte") \ 270 ENUM_ENTRY(TYPE_M64, "8-byte") \ 271 ENUM_ENTRY(TYPE_LEA, "Effective address") \ 272 ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \ 273 ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \ 274 ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \ 275 ENUM_ENTRY(TYPE_M1632, "2+4-byte") \ 276 ENUM_ENTRY(TYPE_M1664, "2+8-byte") \ 277 ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \ 278 ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \ 279 ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \ 280 ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \ 281 ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \ 282 "base)") \ 283 ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \ 284 ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \ 285 ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \ 286 ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \ 287 "2 = SS, 3 = DS, 4 = FS, 5 = GS") \ 288 ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \ 289 ENUM_ENTRY(TYPE_M64FP, "64-bit") \ 290 ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \ 291 ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \ 292 "floating-point instructions") \ 293 ENUM_ENTRY(TYPE_M32INT, "4-byte") \ 294 ENUM_ENTRY(TYPE_M64INT, "8-byte") \ 295 ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \ 296 ENUM_ENTRY(TYPE_MM, "MMX register operand") \ 297 ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \ 298 ENUM_ENTRY(TYPE_MM64, "8-byte") \ 299 ENUM_ENTRY(TYPE_XMM, "XMM register operand") \ 300 ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \ 301 ENUM_ENTRY(TYPE_XMM64, "8-byte") \ 302 ENUM_ENTRY(TYPE_XMM128, "16-byte") \ 303 ENUM_ENTRY(TYPE_XMM256, "32-byte") \ 304 ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \ 305 ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ 306 ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ 307 ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \ 308 \ 309 ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \ 310 ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \ 311 ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \ 312 ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \ 313 ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \ 314 ENUM_ENTRY(TYPE_DUP1, "operand 1") \ 315 ENUM_ENTRY(TYPE_DUP2, "operand 2") \ 316 ENUM_ENTRY(TYPE_DUP3, "operand 3") \ 317 ENUM_ENTRY(TYPE_DUP4, "operand 4") \ 318 ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state") 319 320 #define ENUM_ENTRY(n, d) n, 321 typedef enum { 322 TYPES 323 TYPE_max 324 } OperandType; 325 #undef ENUM_ENTRY 326 327 /* 328 * OperandSpecifier - The specification for how to extract and interpret one 329 * operand. 330 */ 331 struct OperandSpecifier { 332 OperandEncoding encoding; 333 OperandType type; 334 }; 335 336 /* 337 * Indicates where the opcode modifier (if any) is to be found. Extended 338 * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte. 339 */ 340 341 #define MODIFIER_TYPES \ 342 ENUM_ENTRY(MODIFIER_NONE) \ 343 ENUM_ENTRY(MODIFIER_OPCODE) \ 344 ENUM_ENTRY(MODIFIER_MODRM) 345 346 #define ENUM_ENTRY(n) n, 347 typedef enum { 348 MODIFIER_TYPES 349 MODIFIER_max 350 } ModifierType; 351 #undef ENUM_ENTRY 352 353 #define X86_MAX_OPERANDS 5 354 355 /* 356 * The specification for how to extract and interpret a full instruction and 357 * its operands. 358 */ 359 struct InstructionSpecifier { 360 ModifierType modifierType; 361 uint8_t modifierBase; 362 struct OperandSpecifier operands[X86_MAX_OPERANDS]; 363 364 /* The macro below must be defined wherever this file is included. */ 365 INSTRUCTION_SPECIFIER_FIELDS 366 }; 367 368 /* 369 * Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode 370 * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode, 371 * respectively. 372 */ 373 typedef enum { 374 MODE_16BIT, 375 MODE_32BIT, 376 MODE_64BIT 377 } DisassemblerMode; 378 379 #endif 380