1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package x86asm implements decoding of x86 machine code. 6 package x86asm 7 8 import ( 9 "bytes" 10 "fmt" 11 ) 12 13 // An Inst is a single instruction. 14 type Inst struct { 15 Prefix Prefixes // Prefixes applied to the instruction. 16 Op Op // Opcode mnemonic 17 Opcode uint32 // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc) 18 Args Args // Instruction arguments, in Intel order 19 Mode int // processor mode in bits: 16, 32, or 64 20 AddrSize int // address size in bits: 16, 32, or 64 21 DataSize int // operand size in bits: 16, 32, or 64 22 MemBytes int // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on. 23 Len int // length of encoded instruction in bytes 24 PCRel int // length of PC-relative address in instruction encoding 25 PCRelOff int // index of start of PC-relative address in instruction encoding 26 } 27 28 // Prefixes is an array of prefixes associated with a single instruction. 29 // The prefixes are listed in the same order as found in the instruction: 30 // each prefix byte corresponds to one slot in the array. The first zero 31 // in the array marks the end of the prefixes. 32 type Prefixes [14]Prefix 33 34 // A Prefix represents an Intel instruction prefix. 35 // The low 8 bits are the actual prefix byte encoding, 36 // and the top 8 bits contain distinguishing bits and metadata. 37 type Prefix uint16 38 39 const ( 40 // Metadata about the role of a prefix in an instruction. 41 PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text 42 PrefixIgnored Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix 43 PrefixInvalid Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK) 44 45 // Memory segment overrides. 46 PrefixES Prefix = 0x26 // ES segment override 47 PrefixCS Prefix = 0x2E // CS segment override 48 PrefixSS Prefix = 0x36 // SS segment override 49 PrefixDS Prefix = 0x3E // DS segment override 50 PrefixFS Prefix = 0x64 // FS segment override 51 PrefixGS Prefix = 0x65 // GS segment override 52 53 // Branch prediction. 54 PrefixPN Prefix = 0x12E // predict not taken (conditional branch only) 55 PrefixPT Prefix = 0x13E // predict taken (conditional branch only) 56 57 // Size attributes. 58 PrefixDataSize Prefix = 0x66 // operand size override 59 PrefixData16 Prefix = 0x166 60 PrefixData32 Prefix = 0x266 61 PrefixAddrSize Prefix = 0x67 // address size override 62 PrefixAddr16 Prefix = 0x167 63 PrefixAddr32 Prefix = 0x267 64 65 // One of a kind. 66 PrefixLOCK Prefix = 0xF0 // lock 67 PrefixREPN Prefix = 0xF2 // repeat not zero 68 PrefixXACQUIRE Prefix = 0x1F2 69 PrefixBND Prefix = 0x2F2 70 PrefixREP Prefix = 0xF3 // repeat 71 PrefixXRELEASE Prefix = 0x1F3 72 73 // The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10). 74 // the other bits are set or not according to the intended use. 75 PrefixREX Prefix = 0x40 // REX 64-bit extension prefix 76 PrefixREXW Prefix = 0x08 // extension bit W (64-bit instruction width) 77 PrefixREXR Prefix = 0x04 // extension bit R (r field in modrm) 78 PrefixREXX Prefix = 0x02 // extension bit X (index field in sib) 79 PrefixREXB Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib) 80 ) 81 82 // IsREX reports whether p is a REX prefix byte. 83 func (p Prefix) IsREX() bool { 84 return p&0xF0 == PrefixREX 85 } 86 87 func (p Prefix) String() string { 88 p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid 89 if s := prefixNames[p]; s != "" { 90 return s 91 } 92 93 if p.IsREX() { 94 s := "REX." 95 if p&PrefixREXW != 0 { 96 s += "W" 97 } 98 if p&PrefixREXR != 0 { 99 s += "R" 100 } 101 if p&PrefixREXX != 0 { 102 s += "X" 103 } 104 if p&PrefixREXB != 0 { 105 s += "B" 106 } 107 return s 108 } 109 110 return fmt.Sprintf("Prefix(%#x)", int(p)) 111 } 112 113 // An Op is an x86 opcode. 114 type Op uint32 115 116 func (op Op) String() string { 117 i := int(op) 118 if i < 0 || i >= len(opNames) || opNames[i] == "" { 119 return fmt.Sprintf("Op(%d)", i) 120 } 121 return opNames[i] 122 } 123 124 // An Args holds the instruction arguments. 125 // If an instruction has fewer than 4 arguments, 126 // the final elements in the array are nil. 127 type Args [4]Arg 128 129 // An Arg is a single instruction argument, 130 // one of these types: Reg, Mem, Imm, Rel. 131 type Arg interface { 132 String() string 133 isArg() 134 } 135 136 // Note that the implements of Arg that follow are all sized 137 // so that on a 64-bit machine the data can be inlined in 138 // the interface value instead of requiring an allocation. 139 140 // A Reg is a single register. 141 // The zero Reg value has no name but indicates ``no register.'' 142 type Reg uint8 143 144 const ( 145 _ Reg = iota 146 147 // 8-bit 148 AL 149 CL 150 DL 151 BL 152 AH 153 CH 154 DH 155 BH 156 SPB 157 BPB 158 SIB 159 DIB 160 R8B 161 R9B 162 R10B 163 R11B 164 R12B 165 R13B 166 R14B 167 R15B 168 169 // 16-bit 170 AX 171 CX 172 DX 173 BX 174 SP 175 BP 176 SI 177 DI 178 R8W 179 R9W 180 R10W 181 R11W 182 R12W 183 R13W 184 R14W 185 R15W 186 187 // 32-bit 188 EAX 189 ECX 190 EDX 191 EBX 192 ESP 193 EBP 194 ESI 195 EDI 196 R8L 197 R9L 198 R10L 199 R11L 200 R12L 201 R13L 202 R14L 203 R15L 204 205 // 64-bit 206 RAX 207 RCX 208 RDX 209 RBX 210 RSP 211 RBP 212 RSI 213 RDI 214 R8 215 R9 216 R10 217 R11 218 R12 219 R13 220 R14 221 R15 222 223 // Instruction pointer. 224 IP // 16-bit 225 EIP // 32-bit 226 RIP // 64-bit 227 228 // 387 floating point registers. 229 F0 230 F1 231 F2 232 F3 233 F4 234 F5 235 F6 236 F7 237 238 // MMX registers. 239 M0 240 M1 241 M2 242 M3 243 M4 244 M5 245 M6 246 M7 247 248 // XMM registers. 249 X0 250 X1 251 X2 252 X3 253 X4 254 X5 255 X6 256 X7 257 X8 258 X9 259 X10 260 X11 261 X12 262 X13 263 X14 264 X15 265 266 // Segment registers. 267 ES 268 CS 269 SS 270 DS 271 FS 272 GS 273 274 // System registers. 275 GDTR 276 IDTR 277 LDTR 278 MSW 279 TASK 280 281 // Control registers. 282 CR0 283 CR1 284 CR2 285 CR3 286 CR4 287 CR5 288 CR6 289 CR7 290 CR8 291 CR9 292 CR10 293 CR11 294 CR12 295 CR13 296 CR14 297 CR15 298 299 // Debug registers. 300 DR0 301 DR1 302 DR2 303 DR3 304 DR4 305 DR5 306 DR6 307 DR7 308 DR8 309 DR9 310 DR10 311 DR11 312 DR12 313 DR13 314 DR14 315 DR15 316 317 // Task registers. 318 TR0 319 TR1 320 TR2 321 TR3 322 TR4 323 TR5 324 TR6 325 TR7 326 ) 327 328 const regMax = TR7 329 330 func (Reg) isArg() {} 331 332 func (r Reg) String() string { 333 i := int(r) 334 if i < 0 || i >= len(regNames) || regNames[i] == "" { 335 return fmt.Sprintf("Reg(%d)", i) 336 } 337 return regNames[i] 338 } 339 340 // A Mem is a memory reference. 341 // The general form is Segment:[Base+Scale*Index+Disp]. 342 type Mem struct { 343 Segment Reg 344 Base Reg 345 Scale uint8 346 Index Reg 347 Disp int64 348 } 349 350 func (Mem) isArg() {} 351 352 func (m Mem) String() string { 353 var base, plus, scale, index, disp string 354 355 if m.Base != 0 { 356 base = m.Base.String() 357 } 358 if m.Scale != 0 { 359 if m.Base != 0 { 360 plus = "+" 361 } 362 if m.Scale > 1 { 363 scale = fmt.Sprintf("%d*", m.Scale) 364 } 365 index = m.Index.String() 366 } 367 if m.Disp != 0 || m.Base == 0 && m.Scale == 0 { 368 disp = fmt.Sprintf("%+#x", m.Disp) 369 } 370 return "[" + base + plus + scale + index + disp + "]" 371 } 372 373 // A Rel is an offset relative to the current instruction pointer. 374 type Rel int32 375 376 func (Rel) isArg() {} 377 378 func (r Rel) String() string { 379 return fmt.Sprintf(".%+d", r) 380 } 381 382 // An Imm is an integer constant. 383 type Imm int64 384 385 func (Imm) isArg() {} 386 387 func (i Imm) String() string { 388 return fmt.Sprintf("%#x", int64(i)) 389 } 390 391 func (i Inst) String() string { 392 var buf bytes.Buffer 393 for _, p := range i.Prefix { 394 if p == 0 { 395 break 396 } 397 if p&PrefixImplicit != 0 { 398 continue 399 } 400 fmt.Fprintf(&buf, "%v ", p) 401 } 402 fmt.Fprintf(&buf, "%v", i.Op) 403 sep := " " 404 for _, v := range i.Args { 405 if v == nil { 406 break 407 } 408 fmt.Fprintf(&buf, "%s%v", sep, v) 409 sep = ", " 410 } 411 return buf.String() 412 } 413 414 func isReg(a Arg) bool { 415 _, ok := a.(Reg) 416 return ok 417 } 418 419 func isSegReg(a Arg) bool { 420 r, ok := a.(Reg) 421 return ok && ES <= r && r <= GS 422 } 423 424 func isMem(a Arg) bool { 425 _, ok := a.(Mem) 426 return ok 427 } 428 429 func isImm(a Arg) bool { 430 _, ok := a.(Imm) 431 return ok 432 } 433 434 func regBytes(a Arg) int { 435 r, ok := a.(Reg) 436 if !ok { 437 return 0 438 } 439 if AL <= r && r <= R15B { 440 return 1 441 } 442 if AX <= r && r <= R15W { 443 return 2 444 } 445 if EAX <= r && r <= R15L { 446 return 4 447 } 448 if RAX <= r && r <= R15 { 449 return 8 450 } 451 return 0 452 } 453 454 func isSegment(p Prefix) bool { 455 switch p { 456 case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS: 457 return true 458 } 459 return false 460 } 461 462 // The Op definitions and string list are in tables.go. 463 464 var prefixNames = map[Prefix]string{ 465 PrefixCS: "CS", 466 PrefixDS: "DS", 467 PrefixES: "ES", 468 PrefixFS: "FS", 469 PrefixGS: "GS", 470 PrefixSS: "SS", 471 PrefixLOCK: "LOCK", 472 PrefixREP: "REP", 473 PrefixREPN: "REPN", 474 PrefixAddrSize: "ADDRSIZE", 475 PrefixDataSize: "DATASIZE", 476 PrefixAddr16: "ADDR16", 477 PrefixData16: "DATA16", 478 PrefixAddr32: "ADDR32", 479 PrefixData32: "DATA32", 480 PrefixBND: "BND", 481 PrefixXACQUIRE: "XACQUIRE", 482 PrefixXRELEASE: "XRELEASE", 483 PrefixREX: "REX", 484 PrefixPT: "PT", 485 PrefixPN: "PN", 486 } 487 488 var regNames = [...]string{ 489 AL: "AL", 490 CL: "CL", 491 BL: "BL", 492 DL: "DL", 493 AH: "AH", 494 CH: "CH", 495 BH: "BH", 496 DH: "DH", 497 SPB: "SPB", 498 BPB: "BPB", 499 SIB: "SIB", 500 DIB: "DIB", 501 R8B: "R8B", 502 R9B: "R9B", 503 R10B: "R10B", 504 R11B: "R11B", 505 R12B: "R12B", 506 R13B: "R13B", 507 R14B: "R14B", 508 R15B: "R15B", 509 AX: "AX", 510 CX: "CX", 511 BX: "BX", 512 DX: "DX", 513 SP: "SP", 514 BP: "BP", 515 SI: "SI", 516 DI: "DI", 517 R8W: "R8W", 518 R9W: "R9W", 519 R10W: "R10W", 520 R11W: "R11W", 521 R12W: "R12W", 522 R13W: "R13W", 523 R14W: "R14W", 524 R15W: "R15W", 525 EAX: "EAX", 526 ECX: "ECX", 527 EDX: "EDX", 528 EBX: "EBX", 529 ESP: "ESP", 530 EBP: "EBP", 531 ESI: "ESI", 532 EDI: "EDI", 533 R8L: "R8L", 534 R9L: "R9L", 535 R10L: "R10L", 536 R11L: "R11L", 537 R12L: "R12L", 538 R13L: "R13L", 539 R14L: "R14L", 540 R15L: "R15L", 541 RAX: "RAX", 542 RCX: "RCX", 543 RDX: "RDX", 544 RBX: "RBX", 545 RSP: "RSP", 546 RBP: "RBP", 547 RSI: "RSI", 548 RDI: "RDI", 549 R8: "R8", 550 R9: "R9", 551 R10: "R10", 552 R11: "R11", 553 R12: "R12", 554 R13: "R13", 555 R14: "R14", 556 R15: "R15", 557 IP: "IP", 558 EIP: "EIP", 559 RIP: "RIP", 560 F0: "F0", 561 F1: "F1", 562 F2: "F2", 563 F3: "F3", 564 F4: "F4", 565 F5: "F5", 566 F6: "F6", 567 F7: "F7", 568 M0: "M0", 569 M1: "M1", 570 M2: "M2", 571 M3: "M3", 572 M4: "M4", 573 M5: "M5", 574 M6: "M6", 575 M7: "M7", 576 X0: "X0", 577 X1: "X1", 578 X2: "X2", 579 X3: "X3", 580 X4: "X4", 581 X5: "X5", 582 X6: "X6", 583 X7: "X7", 584 X8: "X8", 585 X9: "X9", 586 X10: "X10", 587 X11: "X11", 588 X12: "X12", 589 X13: "X13", 590 X14: "X14", 591 X15: "X15", 592 CS: "CS", 593 SS: "SS", 594 DS: "DS", 595 ES: "ES", 596 FS: "FS", 597 GS: "GS", 598 GDTR: "GDTR", 599 IDTR: "IDTR", 600 LDTR: "LDTR", 601 MSW: "MSW", 602 TASK: "TASK", 603 CR0: "CR0", 604 CR1: "CR1", 605 CR2: "CR2", 606 CR3: "CR3", 607 CR4: "CR4", 608 CR5: "CR5", 609 CR6: "CR6", 610 CR7: "CR7", 611 CR8: "CR8", 612 CR9: "CR9", 613 CR10: "CR10", 614 CR11: "CR11", 615 CR12: "CR12", 616 CR13: "CR13", 617 CR14: "CR14", 618 CR15: "CR15", 619 DR0: "DR0", 620 DR1: "DR1", 621 DR2: "DR2", 622 DR3: "DR3", 623 DR4: "DR4", 624 DR5: "DR5", 625 DR6: "DR6", 626 DR7: "DR7", 627 DR8: "DR8", 628 DR9: "DR9", 629 DR10: "DR10", 630 DR11: "DR11", 631 DR12: "DR12", 632 DR13: "DR13", 633 DR14: "DR14", 634 DR15: "DR15", 635 TR0: "TR0", 636 TR1: "TR1", 637 TR2: "TR2", 638 TR3: "TR3", 639 TR4: "TR4", 640 TR5: "TR5", 641 TR6: "TR6", 642 TR7: "TR7", 643 } 644