1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package x86asm implements decoding of x86 machine code. 6 package x86asm 7 8 import ( 9 "bytes" 10 "fmt" 11 ) 12 13 // An Inst is a single instruction. 14 type Inst struct { 15 Prefix Prefixes // Prefixes applied to the instruction. 16 Op Op // Opcode mnemonic 17 Opcode uint32 // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc) 18 Args Args // Instruction arguments, in Intel order 19 Mode int // processor mode in bits: 16, 32, or 64 20 AddrSize int // address size in bits: 16, 32, or 64 21 DataSize int // operand size in bits: 16, 32, or 64 22 MemBytes int // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on. 23 Len int // length of encoded instruction in bytes 24 } 25 26 // Prefixes is an array of prefixes associated with a single instruction. 27 // The prefixes are listed in the same order as found in the instruction: 28 // each prefix byte corresponds to one slot in the array. The first zero 29 // in the array marks the end of the prefixes. 30 type Prefixes [14]Prefix 31 32 // A Prefix represents an Intel instruction prefix. 33 // The low 8 bits are the actual prefix byte encoding, 34 // and the top 8 bits contain distinguishing bits and metadata. 35 type Prefix uint16 36 37 const ( 38 // Metadata about the role of a prefix in an instruction. 39 PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text 40 PrefixIgnored Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix 41 PrefixInvalid Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK) 42 43 // Memory segment overrides. 44 PrefixES Prefix = 0x26 // ES segment override 45 PrefixCS Prefix = 0x2E // CS segment override 46 PrefixSS Prefix = 0x36 // SS segment override 47 PrefixDS Prefix = 0x3E // DS segment override 48 PrefixFS Prefix = 0x64 // FS segment override 49 PrefixGS Prefix = 0x65 // GS segment override 50 51 // Branch prediction. 52 PrefixPN Prefix = 0x12E // predict not taken (conditional branch only) 53 PrefixPT Prefix = 0x13E // predict taken (conditional branch only) 54 55 // Size attributes. 56 PrefixDataSize Prefix = 0x66 // operand size override 57 PrefixData16 Prefix = 0x166 58 PrefixData32 Prefix = 0x266 59 PrefixAddrSize Prefix = 0x67 // address size override 60 PrefixAddr16 Prefix = 0x167 61 PrefixAddr32 Prefix = 0x267 62 63 // One of a kind. 64 PrefixLOCK Prefix = 0xF0 // lock 65 PrefixREPN Prefix = 0xF2 // repeat not zero 66 PrefixXACQUIRE Prefix = 0x1F2 67 PrefixBND Prefix = 0x2F2 68 PrefixREP Prefix = 0xF3 // repeat 69 PrefixXRELEASE Prefix = 0x1F3 70 71 // The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10). 72 // the other bits are set or not according to the intended use. 73 PrefixREX Prefix = 0x40 // REX 64-bit extension prefix 74 PrefixREXW Prefix = 0x08 // extension bit W (64-bit instruction width) 75 PrefixREXR Prefix = 0x04 // extension bit R (r field in modrm) 76 PrefixREXX Prefix = 0x02 // extension bit X (index field in sib) 77 PrefixREXB Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib) 78 ) 79 80 // IsREX reports whether p is a REX prefix byte. 81 func (p Prefix) IsREX() bool { 82 return p&0xF0 == PrefixREX 83 } 84 85 func (p Prefix) String() string { 86 p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid 87 if s := prefixNames[p]; s != "" { 88 return s 89 } 90 91 if p.IsREX() { 92 s := "REX." 93 if p&PrefixREXW != 0 { 94 s += "W" 95 } 96 if p&PrefixREXR != 0 { 97 s += "R" 98 } 99 if p&PrefixREXX != 0 { 100 s += "X" 101 } 102 if p&PrefixREXB != 0 { 103 s += "B" 104 } 105 return s 106 } 107 108 return fmt.Sprintf("Prefix(%#x)", int(p)) 109 } 110 111 // An Op is an x86 opcode. 112 type Op uint32 113 114 func (op Op) String() string { 115 i := int(op) 116 if i < 0 || i >= len(opNames) || opNames[i] == "" { 117 return fmt.Sprintf("Op(%d)", i) 118 } 119 return opNames[i] 120 } 121 122 // An Args holds the instruction arguments. 123 // If an instruction has fewer than 4 arguments, 124 // the final elements in the array are nil. 125 type Args [4]Arg 126 127 // An Arg is a single instruction argument, 128 // one of these types: Reg, Mem, Imm, Rel. 129 type Arg interface { 130 String() string 131 isArg() 132 } 133 134 // Note that the implements of Arg that follow are all sized 135 // so that on a 64-bit machine the data can be inlined in 136 // the interface value instead of requiring an allocation. 137 138 // A Reg is a single register. 139 // The zero Reg value has no name but indicates ``no register.'' 140 type Reg uint8 141 142 const ( 143 _ Reg = iota 144 145 // 8-bit 146 AL 147 CL 148 DL 149 BL 150 AH 151 CH 152 DH 153 BH 154 SPB 155 BPB 156 SIB 157 DIB 158 R8B 159 R9B 160 R10B 161 R11B 162 R12B 163 R13B 164 R14B 165 R15B 166 167 // 16-bit 168 AX 169 CX 170 DX 171 BX 172 SP 173 BP 174 SI 175 DI 176 R8W 177 R9W 178 R10W 179 R11W 180 R12W 181 R13W 182 R14W 183 R15W 184 185 // 32-bit 186 EAX 187 ECX 188 EDX 189 EBX 190 ESP 191 EBP 192 ESI 193 EDI 194 R8L 195 R9L 196 R10L 197 R11L 198 R12L 199 R13L 200 R14L 201 R15L 202 203 // 64-bit 204 RAX 205 RCX 206 RDX 207 RBX 208 RSP 209 RBP 210 RSI 211 RDI 212 R8 213 R9 214 R10 215 R11 216 R12 217 R13 218 R14 219 R15 220 221 // Instruction pointer. 222 IP // 16-bit 223 EIP // 32-bit 224 RIP // 64-bit 225 226 // 387 floating point registers. 227 F0 228 F1 229 F2 230 F3 231 F4 232 F5 233 F6 234 F7 235 236 // MMX registers. 237 M0 238 M1 239 M2 240 M3 241 M4 242 M5 243 M6 244 M7 245 246 // XMM registers. 247 X0 248 X1 249 X2 250 X3 251 X4 252 X5 253 X6 254 X7 255 X8 256 X9 257 X10 258 X11 259 X12 260 X13 261 X14 262 X15 263 264 // Segment registers. 265 ES 266 CS 267 SS 268 DS 269 FS 270 GS 271 272 // System registers. 273 GDTR 274 IDTR 275 LDTR 276 MSW 277 TASK 278 279 // Control registers. 280 CR0 281 CR1 282 CR2 283 CR3 284 CR4 285 CR5 286 CR6 287 CR7 288 CR8 289 CR9 290 CR10 291 CR11 292 CR12 293 CR13 294 CR14 295 CR15 296 297 // Debug registers. 298 DR0 299 DR1 300 DR2 301 DR3 302 DR4 303 DR5 304 DR6 305 DR7 306 DR8 307 DR9 308 DR10 309 DR11 310 DR12 311 DR13 312 DR14 313 DR15 314 315 // Task registers. 316 TR0 317 TR1 318 TR2 319 TR3 320 TR4 321 TR5 322 TR6 323 TR7 324 ) 325 326 const regMax = TR7 327 328 func (Reg) isArg() {} 329 330 func (r Reg) String() string { 331 i := int(r) 332 if i < 0 || i >= len(regNames) || regNames[i] == "" { 333 return fmt.Sprintf("Reg(%d)", i) 334 } 335 return regNames[i] 336 } 337 338 // A Mem is a memory reference. 339 // The general form is Segment:[Base+Scale*Index+Disp]. 340 type Mem struct { 341 Segment Reg 342 Base Reg 343 Scale uint8 344 Index Reg 345 Disp int64 346 } 347 348 func (Mem) isArg() {} 349 350 func (m Mem) String() string { 351 var base, plus, scale, index, disp string 352 353 if m.Base != 0 { 354 base = m.Base.String() 355 } 356 if m.Scale != 0 { 357 if m.Base != 0 { 358 plus = "+" 359 } 360 if m.Scale > 1 { 361 scale = fmt.Sprintf("%d*", m.Scale) 362 } 363 index = m.Index.String() 364 } 365 if m.Disp != 0 || m.Base == 0 && m.Scale == 0 { 366 disp = fmt.Sprintf("%+#x", m.Disp) 367 } 368 return "[" + base + plus + scale + index + disp + "]" 369 } 370 371 // A Rel is an offset relative to the current instruction pointer. 372 type Rel int32 373 374 func (Rel) isArg() {} 375 376 func (r Rel) String() string { 377 return fmt.Sprintf(".%+d", r) 378 } 379 380 // An Imm is an integer constant. 381 type Imm int64 382 383 func (Imm) isArg() {} 384 385 func (i Imm) String() string { 386 return fmt.Sprintf("%#x", int64(i)) 387 } 388 389 func (i Inst) String() string { 390 var buf bytes.Buffer 391 for _, p := range i.Prefix { 392 if p == 0 { 393 break 394 } 395 if p&PrefixImplicit != 0 { 396 continue 397 } 398 fmt.Fprintf(&buf, "%v ", p) 399 } 400 fmt.Fprintf(&buf, "%v", i.Op) 401 sep := " " 402 for _, v := range i.Args { 403 if v == nil { 404 break 405 } 406 fmt.Fprintf(&buf, "%s%v", sep, v) 407 sep = ", " 408 } 409 return buf.String() 410 } 411 412 func isReg(a Arg) bool { 413 _, ok := a.(Reg) 414 return ok 415 } 416 417 func isSegReg(a Arg) bool { 418 r, ok := a.(Reg) 419 return ok && ES <= r && r <= GS 420 } 421 422 func isMem(a Arg) bool { 423 _, ok := a.(Mem) 424 return ok 425 } 426 427 func isImm(a Arg) bool { 428 _, ok := a.(Imm) 429 return ok 430 } 431 432 func regBytes(a Arg) int { 433 r, ok := a.(Reg) 434 if !ok { 435 return 0 436 } 437 if AL <= r && r <= R15B { 438 return 1 439 } 440 if AX <= r && r <= R15W { 441 return 2 442 } 443 if EAX <= r && r <= R15L { 444 return 4 445 } 446 if RAX <= r && r <= R15 { 447 return 8 448 } 449 return 0 450 } 451 452 func isSegment(p Prefix) bool { 453 switch p { 454 case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS: 455 return true 456 } 457 return false 458 } 459 460 // The Op definitions and string list are in tables.go. 461 462 var prefixNames = map[Prefix]string{ 463 PrefixCS: "CS", 464 PrefixDS: "DS", 465 PrefixES: "ES", 466 PrefixFS: "FS", 467 PrefixGS: "GS", 468 PrefixSS: "SS", 469 PrefixLOCK: "LOCK", 470 PrefixREP: "REP", 471 PrefixREPN: "REPN", 472 PrefixAddrSize: "ADDRSIZE", 473 PrefixDataSize: "DATASIZE", 474 PrefixAddr16: "ADDR16", 475 PrefixData16: "DATA16", 476 PrefixAddr32: "ADDR32", 477 PrefixData32: "DATA32", 478 PrefixBND: "BND", 479 PrefixXACQUIRE: "XACQUIRE", 480 PrefixXRELEASE: "XRELEASE", 481 PrefixREX: "REX", 482 PrefixPT: "PT", 483 PrefixPN: "PN", 484 } 485 486 var regNames = [...]string{ 487 AL: "AL", 488 CL: "CL", 489 BL: "BL", 490 DL: "DL", 491 AH: "AH", 492 CH: "CH", 493 BH: "BH", 494 DH: "DH", 495 SPB: "SPB", 496 BPB: "BPB", 497 SIB: "SIB", 498 DIB: "DIB", 499 R8B: "R8B", 500 R9B: "R9B", 501 R10B: "R10B", 502 R11B: "R11B", 503 R12B: "R12B", 504 R13B: "R13B", 505 R14B: "R14B", 506 R15B: "R15B", 507 AX: "AX", 508 CX: "CX", 509 BX: "BX", 510 DX: "DX", 511 SP: "SP", 512 BP: "BP", 513 SI: "SI", 514 DI: "DI", 515 R8W: "R8W", 516 R9W: "R9W", 517 R10W: "R10W", 518 R11W: "R11W", 519 R12W: "R12W", 520 R13W: "R13W", 521 R14W: "R14W", 522 R15W: "R15W", 523 EAX: "EAX", 524 ECX: "ECX", 525 EDX: "EDX", 526 EBX: "EBX", 527 ESP: "ESP", 528 EBP: "EBP", 529 ESI: "ESI", 530 EDI: "EDI", 531 R8L: "R8L", 532 R9L: "R9L", 533 R10L: "R10L", 534 R11L: "R11L", 535 R12L: "R12L", 536 R13L: "R13L", 537 R14L: "R14L", 538 R15L: "R15L", 539 RAX: "RAX", 540 RCX: "RCX", 541 RDX: "RDX", 542 RBX: "RBX", 543 RSP: "RSP", 544 RBP: "RBP", 545 RSI: "RSI", 546 RDI: "RDI", 547 R8: "R8", 548 R9: "R9", 549 R10: "R10", 550 R11: "R11", 551 R12: "R12", 552 R13: "R13", 553 R14: "R14", 554 R15: "R15", 555 IP: "IP", 556 EIP: "EIP", 557 RIP: "RIP", 558 F0: "F0", 559 F1: "F1", 560 F2: "F2", 561 F3: "F3", 562 F4: "F4", 563 F5: "F5", 564 F6: "F6", 565 F7: "F7", 566 M0: "M0", 567 M1: "M1", 568 M2: "M2", 569 M3: "M3", 570 M4: "M4", 571 M5: "M5", 572 M6: "M6", 573 M7: "M7", 574 X0: "X0", 575 X1: "X1", 576 X2: "X2", 577 X3: "X3", 578 X4: "X4", 579 X5: "X5", 580 X6: "X6", 581 X7: "X7", 582 X8: "X8", 583 X9: "X9", 584 X10: "X10", 585 X11: "X11", 586 X12: "X12", 587 X13: "X13", 588 X14: "X14", 589 X15: "X15", 590 CS: "CS", 591 SS: "SS", 592 DS: "DS", 593 ES: "ES", 594 FS: "FS", 595 GS: "GS", 596 GDTR: "GDTR", 597 IDTR: "IDTR", 598 LDTR: "LDTR", 599 MSW: "MSW", 600 TASK: "TASK", 601 CR0: "CR0", 602 CR1: "CR1", 603 CR2: "CR2", 604 CR3: "CR3", 605 CR4: "CR4", 606 CR5: "CR5", 607 CR6: "CR6", 608 CR7: "CR7", 609 CR8: "CR8", 610 CR9: "CR9", 611 CR10: "CR10", 612 CR11: "CR11", 613 CR12: "CR12", 614 CR13: "CR13", 615 CR14: "CR14", 616 CR15: "CR15", 617 DR0: "DR0", 618 DR1: "DR1", 619 DR2: "DR2", 620 DR3: "DR3", 621 DR4: "DR4", 622 DR5: "DR5", 623 DR6: "DR6", 624 DR7: "DR7", 625 DR8: "DR8", 626 DR9: "DR9", 627 DR10: "DR10", 628 DR11: "DR11", 629 DR12: "DR12", 630 DR13: "DR13", 631 DR14: "DR14", 632 DR15: "DR15", 633 TR0: "TR0", 634 TR1: "TR1", 635 TR2: "TR2", 636 TR3: "TR3", 637 TR4: "TR4", 638 TR5: "TR5", 639 TR6: "TR6", 640 TR7: "TR7", 641 } 642