Home | History | Annotate | Download | only in x86asm
      1 // Copyright 2014 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package x86asm implements decoding of x86 machine code.
      6 package x86asm
      7 
      8 import (
      9 	"bytes"
     10 	"fmt"
     11 )
     12 
     13 // An Inst is a single instruction.
     14 type Inst struct {
     15 	Prefix   Prefixes // Prefixes applied to the instruction.
     16 	Op       Op       // Opcode mnemonic
     17 	Opcode   uint32   // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc)
     18 	Args     Args     // Instruction arguments, in Intel order
     19 	Mode     int      // processor mode in bits: 16, 32, or 64
     20 	AddrSize int      // address size in bits: 16, 32, or 64
     21 	DataSize int      // operand size in bits: 16, 32, or 64
     22 	MemBytes int      // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on.
     23 	Len      int      // length of encoded instruction in bytes
     24 	PCRel    int      // length of PC-relative address in instruction encoding
     25 	PCRelOff int      // index of start of PC-relative address in instruction encoding
     26 }
     27 
     28 // Prefixes is an array of prefixes associated with a single instruction.
     29 // The prefixes are listed in the same order as found in the instruction:
     30 // each prefix byte corresponds to one slot in the array. The first zero
     31 // in the array marks the end of the prefixes.
     32 type Prefixes [14]Prefix
     33 
     34 // A Prefix represents an Intel instruction prefix.
     35 // The low 8 bits are the actual prefix byte encoding,
     36 // and the top 8 bits contain distinguishing bits and metadata.
     37 type Prefix uint16
     38 
     39 const (
     40 	// Metadata about the role of a prefix in an instruction.
     41 	PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text
     42 	PrefixIgnored  Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix
     43 	PrefixInvalid  Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK)
     44 
     45 	// Memory segment overrides.
     46 	PrefixES Prefix = 0x26 // ES segment override
     47 	PrefixCS Prefix = 0x2E // CS segment override
     48 	PrefixSS Prefix = 0x36 // SS segment override
     49 	PrefixDS Prefix = 0x3E // DS segment override
     50 	PrefixFS Prefix = 0x64 // FS segment override
     51 	PrefixGS Prefix = 0x65 // GS segment override
     52 
     53 	// Branch prediction.
     54 	PrefixPN Prefix = 0x12E // predict not taken (conditional branch only)
     55 	PrefixPT Prefix = 0x13E // predict taken (conditional branch only)
     56 
     57 	// Size attributes.
     58 	PrefixDataSize Prefix = 0x66 // operand size override
     59 	PrefixData16   Prefix = 0x166
     60 	PrefixData32   Prefix = 0x266
     61 	PrefixAddrSize Prefix = 0x67 // address size override
     62 	PrefixAddr16   Prefix = 0x167
     63 	PrefixAddr32   Prefix = 0x267
     64 
     65 	// One of a kind.
     66 	PrefixLOCK     Prefix = 0xF0 // lock
     67 	PrefixREPN     Prefix = 0xF2 // repeat not zero
     68 	PrefixXACQUIRE Prefix = 0x1F2
     69 	PrefixBND      Prefix = 0x2F2
     70 	PrefixREP      Prefix = 0xF3 // repeat
     71 	PrefixXRELEASE Prefix = 0x1F3
     72 
     73 	// The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10).
     74 	// the other bits are set or not according to the intended use.
     75 	PrefixREX  Prefix = 0x40 // REX 64-bit extension prefix
     76 	PrefixREXW Prefix = 0x08 // extension bit W (64-bit instruction width)
     77 	PrefixREXR Prefix = 0x04 // extension bit R (r field in modrm)
     78 	PrefixREXX Prefix = 0x02 // extension bit X (index field in sib)
     79 	PrefixREXB Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib)
     80 )
     81 
     82 // IsREX reports whether p is a REX prefix byte.
     83 func (p Prefix) IsREX() bool {
     84 	return p&0xF0 == PrefixREX
     85 }
     86 
     87 func (p Prefix) String() string {
     88 	p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid
     89 	if s := prefixNames[p]; s != "" {
     90 		return s
     91 	}
     92 
     93 	if p.IsREX() {
     94 		s := "REX."
     95 		if p&PrefixREXW != 0 {
     96 			s += "W"
     97 		}
     98 		if p&PrefixREXR != 0 {
     99 			s += "R"
    100 		}
    101 		if p&PrefixREXX != 0 {
    102 			s += "X"
    103 		}
    104 		if p&PrefixREXB != 0 {
    105 			s += "B"
    106 		}
    107 		return s
    108 	}
    109 
    110 	return fmt.Sprintf("Prefix(%#x)", int(p))
    111 }
    112 
    113 // An Op is an x86 opcode.
    114 type Op uint32
    115 
    116 func (op Op) String() string {
    117 	i := int(op)
    118 	if i < 0 || i >= len(opNames) || opNames[i] == "" {
    119 		return fmt.Sprintf("Op(%d)", i)
    120 	}
    121 	return opNames[i]
    122 }
    123 
    124 // An Args holds the instruction arguments.
    125 // If an instruction has fewer than 4 arguments,
    126 // the final elements in the array are nil.
    127 type Args [4]Arg
    128 
    129 // An Arg is a single instruction argument,
    130 // one of these types: Reg, Mem, Imm, Rel.
    131 type Arg interface {
    132 	String() string
    133 	isArg()
    134 }
    135 
    136 // Note that the implements of Arg that follow are all sized
    137 // so that on a 64-bit machine the data can be inlined in
    138 // the interface value instead of requiring an allocation.
    139 
    140 // A Reg is a single register.
    141 // The zero Reg value has no name but indicates ``no register.''
    142 type Reg uint8
    143 
    144 const (
    145 	_ Reg = iota
    146 
    147 	// 8-bit
    148 	AL
    149 	CL
    150 	DL
    151 	BL
    152 	AH
    153 	CH
    154 	DH
    155 	BH
    156 	SPB
    157 	BPB
    158 	SIB
    159 	DIB
    160 	R8B
    161 	R9B
    162 	R10B
    163 	R11B
    164 	R12B
    165 	R13B
    166 	R14B
    167 	R15B
    168 
    169 	// 16-bit
    170 	AX
    171 	CX
    172 	DX
    173 	BX
    174 	SP
    175 	BP
    176 	SI
    177 	DI
    178 	R8W
    179 	R9W
    180 	R10W
    181 	R11W
    182 	R12W
    183 	R13W
    184 	R14W
    185 	R15W
    186 
    187 	// 32-bit
    188 	EAX
    189 	ECX
    190 	EDX
    191 	EBX
    192 	ESP
    193 	EBP
    194 	ESI
    195 	EDI
    196 	R8L
    197 	R9L
    198 	R10L
    199 	R11L
    200 	R12L
    201 	R13L
    202 	R14L
    203 	R15L
    204 
    205 	// 64-bit
    206 	RAX
    207 	RCX
    208 	RDX
    209 	RBX
    210 	RSP
    211 	RBP
    212 	RSI
    213 	RDI
    214 	R8
    215 	R9
    216 	R10
    217 	R11
    218 	R12
    219 	R13
    220 	R14
    221 	R15
    222 
    223 	// Instruction pointer.
    224 	IP  // 16-bit
    225 	EIP // 32-bit
    226 	RIP // 64-bit
    227 
    228 	// 387 floating point registers.
    229 	F0
    230 	F1
    231 	F2
    232 	F3
    233 	F4
    234 	F5
    235 	F6
    236 	F7
    237 
    238 	// MMX registers.
    239 	M0
    240 	M1
    241 	M2
    242 	M3
    243 	M4
    244 	M5
    245 	M6
    246 	M7
    247 
    248 	// XMM registers.
    249 	X0
    250 	X1
    251 	X2
    252 	X3
    253 	X4
    254 	X5
    255 	X6
    256 	X7
    257 	X8
    258 	X9
    259 	X10
    260 	X11
    261 	X12
    262 	X13
    263 	X14
    264 	X15
    265 
    266 	// Segment registers.
    267 	ES
    268 	CS
    269 	SS
    270 	DS
    271 	FS
    272 	GS
    273 
    274 	// System registers.
    275 	GDTR
    276 	IDTR
    277 	LDTR
    278 	MSW
    279 	TASK
    280 
    281 	// Control registers.
    282 	CR0
    283 	CR1
    284 	CR2
    285 	CR3
    286 	CR4
    287 	CR5
    288 	CR6
    289 	CR7
    290 	CR8
    291 	CR9
    292 	CR10
    293 	CR11
    294 	CR12
    295 	CR13
    296 	CR14
    297 	CR15
    298 
    299 	// Debug registers.
    300 	DR0
    301 	DR1
    302 	DR2
    303 	DR3
    304 	DR4
    305 	DR5
    306 	DR6
    307 	DR7
    308 	DR8
    309 	DR9
    310 	DR10
    311 	DR11
    312 	DR12
    313 	DR13
    314 	DR14
    315 	DR15
    316 
    317 	// Task registers.
    318 	TR0
    319 	TR1
    320 	TR2
    321 	TR3
    322 	TR4
    323 	TR5
    324 	TR6
    325 	TR7
    326 )
    327 
    328 const regMax = TR7
    329 
    330 func (Reg) isArg() {}
    331 
    332 func (r Reg) String() string {
    333 	i := int(r)
    334 	if i < 0 || i >= len(regNames) || regNames[i] == "" {
    335 		return fmt.Sprintf("Reg(%d)", i)
    336 	}
    337 	return regNames[i]
    338 }
    339 
    340 // A Mem is a memory reference.
    341 // The general form is Segment:[Base+Scale*Index+Disp].
    342 type Mem struct {
    343 	Segment Reg
    344 	Base    Reg
    345 	Scale   uint8
    346 	Index   Reg
    347 	Disp    int64
    348 }
    349 
    350 func (Mem) isArg() {}
    351 
    352 func (m Mem) String() string {
    353 	var base, plus, scale, index, disp string
    354 
    355 	if m.Base != 0 {
    356 		base = m.Base.String()
    357 	}
    358 	if m.Scale != 0 {
    359 		if m.Base != 0 {
    360 			plus = "+"
    361 		}
    362 		if m.Scale > 1 {
    363 			scale = fmt.Sprintf("%d*", m.Scale)
    364 		}
    365 		index = m.Index.String()
    366 	}
    367 	if m.Disp != 0 || m.Base == 0 && m.Scale == 0 {
    368 		disp = fmt.Sprintf("%+#x", m.Disp)
    369 	}
    370 	return "[" + base + plus + scale + index + disp + "]"
    371 }
    372 
    373 // A Rel is an offset relative to the current instruction pointer.
    374 type Rel int32
    375 
    376 func (Rel) isArg() {}
    377 
    378 func (r Rel) String() string {
    379 	return fmt.Sprintf(".%+d", r)
    380 }
    381 
    382 // An Imm is an integer constant.
    383 type Imm int64
    384 
    385 func (Imm) isArg() {}
    386 
    387 func (i Imm) String() string {
    388 	return fmt.Sprintf("%#x", int64(i))
    389 }
    390 
    391 func (i Inst) String() string {
    392 	var buf bytes.Buffer
    393 	for _, p := range i.Prefix {
    394 		if p == 0 {
    395 			break
    396 		}
    397 		if p&PrefixImplicit != 0 {
    398 			continue
    399 		}
    400 		fmt.Fprintf(&buf, "%v ", p)
    401 	}
    402 	fmt.Fprintf(&buf, "%v", i.Op)
    403 	sep := " "
    404 	for _, v := range i.Args {
    405 		if v == nil {
    406 			break
    407 		}
    408 		fmt.Fprintf(&buf, "%s%v", sep, v)
    409 		sep = ", "
    410 	}
    411 	return buf.String()
    412 }
    413 
    414 func isReg(a Arg) bool {
    415 	_, ok := a.(Reg)
    416 	return ok
    417 }
    418 
    419 func isSegReg(a Arg) bool {
    420 	r, ok := a.(Reg)
    421 	return ok && ES <= r && r <= GS
    422 }
    423 
    424 func isMem(a Arg) bool {
    425 	_, ok := a.(Mem)
    426 	return ok
    427 }
    428 
    429 func isImm(a Arg) bool {
    430 	_, ok := a.(Imm)
    431 	return ok
    432 }
    433 
    434 func regBytes(a Arg) int {
    435 	r, ok := a.(Reg)
    436 	if !ok {
    437 		return 0
    438 	}
    439 	if AL <= r && r <= R15B {
    440 		return 1
    441 	}
    442 	if AX <= r && r <= R15W {
    443 		return 2
    444 	}
    445 	if EAX <= r && r <= R15L {
    446 		return 4
    447 	}
    448 	if RAX <= r && r <= R15 {
    449 		return 8
    450 	}
    451 	return 0
    452 }
    453 
    454 func isSegment(p Prefix) bool {
    455 	switch p {
    456 	case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
    457 		return true
    458 	}
    459 	return false
    460 }
    461 
    462 // The Op definitions and string list are in tables.go.
    463 
    464 var prefixNames = map[Prefix]string{
    465 	PrefixCS:       "CS",
    466 	PrefixDS:       "DS",
    467 	PrefixES:       "ES",
    468 	PrefixFS:       "FS",
    469 	PrefixGS:       "GS",
    470 	PrefixSS:       "SS",
    471 	PrefixLOCK:     "LOCK",
    472 	PrefixREP:      "REP",
    473 	PrefixREPN:     "REPN",
    474 	PrefixAddrSize: "ADDRSIZE",
    475 	PrefixDataSize: "DATASIZE",
    476 	PrefixAddr16:   "ADDR16",
    477 	PrefixData16:   "DATA16",
    478 	PrefixAddr32:   "ADDR32",
    479 	PrefixData32:   "DATA32",
    480 	PrefixBND:      "BND",
    481 	PrefixXACQUIRE: "XACQUIRE",
    482 	PrefixXRELEASE: "XRELEASE",
    483 	PrefixREX:      "REX",
    484 	PrefixPT:       "PT",
    485 	PrefixPN:       "PN",
    486 }
    487 
    488 var regNames = [...]string{
    489 	AL:   "AL",
    490 	CL:   "CL",
    491 	BL:   "BL",
    492 	DL:   "DL",
    493 	AH:   "AH",
    494 	CH:   "CH",
    495 	BH:   "BH",
    496 	DH:   "DH",
    497 	SPB:  "SPB",
    498 	BPB:  "BPB",
    499 	SIB:  "SIB",
    500 	DIB:  "DIB",
    501 	R8B:  "R8B",
    502 	R9B:  "R9B",
    503 	R10B: "R10B",
    504 	R11B: "R11B",
    505 	R12B: "R12B",
    506 	R13B: "R13B",
    507 	R14B: "R14B",
    508 	R15B: "R15B",
    509 	AX:   "AX",
    510 	CX:   "CX",
    511 	BX:   "BX",
    512 	DX:   "DX",
    513 	SP:   "SP",
    514 	BP:   "BP",
    515 	SI:   "SI",
    516 	DI:   "DI",
    517 	R8W:  "R8W",
    518 	R9W:  "R9W",
    519 	R10W: "R10W",
    520 	R11W: "R11W",
    521 	R12W: "R12W",
    522 	R13W: "R13W",
    523 	R14W: "R14W",
    524 	R15W: "R15W",
    525 	EAX:  "EAX",
    526 	ECX:  "ECX",
    527 	EDX:  "EDX",
    528 	EBX:  "EBX",
    529 	ESP:  "ESP",
    530 	EBP:  "EBP",
    531 	ESI:  "ESI",
    532 	EDI:  "EDI",
    533 	R8L:  "R8L",
    534 	R9L:  "R9L",
    535 	R10L: "R10L",
    536 	R11L: "R11L",
    537 	R12L: "R12L",
    538 	R13L: "R13L",
    539 	R14L: "R14L",
    540 	R15L: "R15L",
    541 	RAX:  "RAX",
    542 	RCX:  "RCX",
    543 	RDX:  "RDX",
    544 	RBX:  "RBX",
    545 	RSP:  "RSP",
    546 	RBP:  "RBP",
    547 	RSI:  "RSI",
    548 	RDI:  "RDI",
    549 	R8:   "R8",
    550 	R9:   "R9",
    551 	R10:  "R10",
    552 	R11:  "R11",
    553 	R12:  "R12",
    554 	R13:  "R13",
    555 	R14:  "R14",
    556 	R15:  "R15",
    557 	IP:   "IP",
    558 	EIP:  "EIP",
    559 	RIP:  "RIP",
    560 	F0:   "F0",
    561 	F1:   "F1",
    562 	F2:   "F2",
    563 	F3:   "F3",
    564 	F4:   "F4",
    565 	F5:   "F5",
    566 	F6:   "F6",
    567 	F7:   "F7",
    568 	M0:   "M0",
    569 	M1:   "M1",
    570 	M2:   "M2",
    571 	M3:   "M3",
    572 	M4:   "M4",
    573 	M5:   "M5",
    574 	M6:   "M6",
    575 	M7:   "M7",
    576 	X0:   "X0",
    577 	X1:   "X1",
    578 	X2:   "X2",
    579 	X3:   "X3",
    580 	X4:   "X4",
    581 	X5:   "X5",
    582 	X6:   "X6",
    583 	X7:   "X7",
    584 	X8:   "X8",
    585 	X9:   "X9",
    586 	X10:  "X10",
    587 	X11:  "X11",
    588 	X12:  "X12",
    589 	X13:  "X13",
    590 	X14:  "X14",
    591 	X15:  "X15",
    592 	CS:   "CS",
    593 	SS:   "SS",
    594 	DS:   "DS",
    595 	ES:   "ES",
    596 	FS:   "FS",
    597 	GS:   "GS",
    598 	GDTR: "GDTR",
    599 	IDTR: "IDTR",
    600 	LDTR: "LDTR",
    601 	MSW:  "MSW",
    602 	TASK: "TASK",
    603 	CR0:  "CR0",
    604 	CR1:  "CR1",
    605 	CR2:  "CR2",
    606 	CR3:  "CR3",
    607 	CR4:  "CR4",
    608 	CR5:  "CR5",
    609 	CR6:  "CR6",
    610 	CR7:  "CR7",
    611 	CR8:  "CR8",
    612 	CR9:  "CR9",
    613 	CR10: "CR10",
    614 	CR11: "CR11",
    615 	CR12: "CR12",
    616 	CR13: "CR13",
    617 	CR14: "CR14",
    618 	CR15: "CR15",
    619 	DR0:  "DR0",
    620 	DR1:  "DR1",
    621 	DR2:  "DR2",
    622 	DR3:  "DR3",
    623 	DR4:  "DR4",
    624 	DR5:  "DR5",
    625 	DR6:  "DR6",
    626 	DR7:  "DR7",
    627 	DR8:  "DR8",
    628 	DR9:  "DR9",
    629 	DR10: "DR10",
    630 	DR11: "DR11",
    631 	DR12: "DR12",
    632 	DR13: "DR13",
    633 	DR14: "DR14",
    634 	DR15: "DR15",
    635 	TR0:  "TR0",
    636 	TR1:  "TR1",
    637 	TR2:  "TR2",
    638 	TR3:  "TR3",
    639 	TR4:  "TR4",
    640 	TR5:  "TR5",
    641 	TR6:  "TR6",
    642 	TR7:  "TR7",
    643 }
    644