Home | History | Annotate | Download | only in x86asm
      1 // Copyright 2014 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package x86asm implements decoding of x86 machine code.
      6 package x86asm
      7 
      8 import (
      9 	"bytes"
     10 	"fmt"
     11 )
     12 
     13 // An Inst is a single instruction.
     14 type Inst struct {
     15 	Prefix   Prefixes // Prefixes applied to the instruction.
     16 	Op       Op       // Opcode mnemonic
     17 	Opcode   uint32   // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc)
     18 	Args     Args     // Instruction arguments, in Intel order
     19 	Mode     int      // processor mode in bits: 16, 32, or 64
     20 	AddrSize int      // address size in bits: 16, 32, or 64
     21 	DataSize int      // operand size in bits: 16, 32, or 64
     22 	MemBytes int      // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on.
     23 	Len      int      // length of encoded instruction in bytes
     24 }
     25 
     26 // Prefixes is an array of prefixes associated with a single instruction.
     27 // The prefixes are listed in the same order as found in the instruction:
     28 // each prefix byte corresponds to one slot in the array. The first zero
     29 // in the array marks the end of the prefixes.
     30 type Prefixes [14]Prefix
     31 
     32 // A Prefix represents an Intel instruction prefix.
     33 // The low 8 bits are the actual prefix byte encoding,
     34 // and the top 8 bits contain distinguishing bits and metadata.
     35 type Prefix uint16
     36 
     37 const (
     38 	// Metadata about the role of a prefix in an instruction.
     39 	PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text
     40 	PrefixIgnored  Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix
     41 	PrefixInvalid  Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK)
     42 
     43 	// Memory segment overrides.
     44 	PrefixES Prefix = 0x26 // ES segment override
     45 	PrefixCS Prefix = 0x2E // CS segment override
     46 	PrefixSS Prefix = 0x36 // SS segment override
     47 	PrefixDS Prefix = 0x3E // DS segment override
     48 	PrefixFS Prefix = 0x64 // FS segment override
     49 	PrefixGS Prefix = 0x65 // GS segment override
     50 
     51 	// Branch prediction.
     52 	PrefixPN Prefix = 0x12E // predict not taken (conditional branch only)
     53 	PrefixPT Prefix = 0x13E // predict taken (conditional branch only)
     54 
     55 	// Size attributes.
     56 	PrefixDataSize Prefix = 0x66 // operand size override
     57 	PrefixData16   Prefix = 0x166
     58 	PrefixData32   Prefix = 0x266
     59 	PrefixAddrSize Prefix = 0x67 // address size override
     60 	PrefixAddr16   Prefix = 0x167
     61 	PrefixAddr32   Prefix = 0x267
     62 
     63 	// One of a kind.
     64 	PrefixLOCK     Prefix = 0xF0 // lock
     65 	PrefixREPN     Prefix = 0xF2 // repeat not zero
     66 	PrefixXACQUIRE Prefix = 0x1F2
     67 	PrefixBND      Prefix = 0x2F2
     68 	PrefixREP      Prefix = 0xF3 // repeat
     69 	PrefixXRELEASE Prefix = 0x1F3
     70 
     71 	// The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10).
     72 	// the other bits are set or not according to the intended use.
     73 	PrefixREX  Prefix = 0x40 // REX 64-bit extension prefix
     74 	PrefixREXW Prefix = 0x08 // extension bit W (64-bit instruction width)
     75 	PrefixREXR Prefix = 0x04 // extension bit R (r field in modrm)
     76 	PrefixREXX Prefix = 0x02 // extension bit X (index field in sib)
     77 	PrefixREXB Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib)
     78 )
     79 
     80 // IsREX reports whether p is a REX prefix byte.
     81 func (p Prefix) IsREX() bool {
     82 	return p&0xF0 == PrefixREX
     83 }
     84 
     85 func (p Prefix) String() string {
     86 	p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid
     87 	if s := prefixNames[p]; s != "" {
     88 		return s
     89 	}
     90 
     91 	if p.IsREX() {
     92 		s := "REX."
     93 		if p&PrefixREXW != 0 {
     94 			s += "W"
     95 		}
     96 		if p&PrefixREXR != 0 {
     97 			s += "R"
     98 		}
     99 		if p&PrefixREXX != 0 {
    100 			s += "X"
    101 		}
    102 		if p&PrefixREXB != 0 {
    103 			s += "B"
    104 		}
    105 		return s
    106 	}
    107 
    108 	return fmt.Sprintf("Prefix(%#x)", int(p))
    109 }
    110 
    111 // An Op is an x86 opcode.
    112 type Op uint32
    113 
    114 func (op Op) String() string {
    115 	i := int(op)
    116 	if i < 0 || i >= len(opNames) || opNames[i] == "" {
    117 		return fmt.Sprintf("Op(%d)", i)
    118 	}
    119 	return opNames[i]
    120 }
    121 
    122 // An Args holds the instruction arguments.
    123 // If an instruction has fewer than 4 arguments,
    124 // the final elements in the array are nil.
    125 type Args [4]Arg
    126 
    127 // An Arg is a single instruction argument,
    128 // one of these types: Reg, Mem, Imm, Rel.
    129 type Arg interface {
    130 	String() string
    131 	isArg()
    132 }
    133 
    134 // Note that the implements of Arg that follow are all sized
    135 // so that on a 64-bit machine the data can be inlined in
    136 // the interface value instead of requiring an allocation.
    137 
    138 // A Reg is a single register.
    139 // The zero Reg value has no name but indicates ``no register.''
    140 type Reg uint8
    141 
    142 const (
    143 	_ Reg = iota
    144 
    145 	// 8-bit
    146 	AL
    147 	CL
    148 	DL
    149 	BL
    150 	AH
    151 	CH
    152 	DH
    153 	BH
    154 	SPB
    155 	BPB
    156 	SIB
    157 	DIB
    158 	R8B
    159 	R9B
    160 	R10B
    161 	R11B
    162 	R12B
    163 	R13B
    164 	R14B
    165 	R15B
    166 
    167 	// 16-bit
    168 	AX
    169 	CX
    170 	DX
    171 	BX
    172 	SP
    173 	BP
    174 	SI
    175 	DI
    176 	R8W
    177 	R9W
    178 	R10W
    179 	R11W
    180 	R12W
    181 	R13W
    182 	R14W
    183 	R15W
    184 
    185 	// 32-bit
    186 	EAX
    187 	ECX
    188 	EDX
    189 	EBX
    190 	ESP
    191 	EBP
    192 	ESI
    193 	EDI
    194 	R8L
    195 	R9L
    196 	R10L
    197 	R11L
    198 	R12L
    199 	R13L
    200 	R14L
    201 	R15L
    202 
    203 	// 64-bit
    204 	RAX
    205 	RCX
    206 	RDX
    207 	RBX
    208 	RSP
    209 	RBP
    210 	RSI
    211 	RDI
    212 	R8
    213 	R9
    214 	R10
    215 	R11
    216 	R12
    217 	R13
    218 	R14
    219 	R15
    220 
    221 	// Instruction pointer.
    222 	IP  // 16-bit
    223 	EIP // 32-bit
    224 	RIP // 64-bit
    225 
    226 	// 387 floating point registers.
    227 	F0
    228 	F1
    229 	F2
    230 	F3
    231 	F4
    232 	F5
    233 	F6
    234 	F7
    235 
    236 	// MMX registers.
    237 	M0
    238 	M1
    239 	M2
    240 	M3
    241 	M4
    242 	M5
    243 	M6
    244 	M7
    245 
    246 	// XMM registers.
    247 	X0
    248 	X1
    249 	X2
    250 	X3
    251 	X4
    252 	X5
    253 	X6
    254 	X7
    255 	X8
    256 	X9
    257 	X10
    258 	X11
    259 	X12
    260 	X13
    261 	X14
    262 	X15
    263 
    264 	// Segment registers.
    265 	ES
    266 	CS
    267 	SS
    268 	DS
    269 	FS
    270 	GS
    271 
    272 	// System registers.
    273 	GDTR
    274 	IDTR
    275 	LDTR
    276 	MSW
    277 	TASK
    278 
    279 	// Control registers.
    280 	CR0
    281 	CR1
    282 	CR2
    283 	CR3
    284 	CR4
    285 	CR5
    286 	CR6
    287 	CR7
    288 	CR8
    289 	CR9
    290 	CR10
    291 	CR11
    292 	CR12
    293 	CR13
    294 	CR14
    295 	CR15
    296 
    297 	// Debug registers.
    298 	DR0
    299 	DR1
    300 	DR2
    301 	DR3
    302 	DR4
    303 	DR5
    304 	DR6
    305 	DR7
    306 	DR8
    307 	DR9
    308 	DR10
    309 	DR11
    310 	DR12
    311 	DR13
    312 	DR14
    313 	DR15
    314 
    315 	// Task registers.
    316 	TR0
    317 	TR1
    318 	TR2
    319 	TR3
    320 	TR4
    321 	TR5
    322 	TR6
    323 	TR7
    324 )
    325 
    326 const regMax = TR7
    327 
    328 func (Reg) isArg() {}
    329 
    330 func (r Reg) String() string {
    331 	i := int(r)
    332 	if i < 0 || i >= len(regNames) || regNames[i] == "" {
    333 		return fmt.Sprintf("Reg(%d)", i)
    334 	}
    335 	return regNames[i]
    336 }
    337 
    338 // A Mem is a memory reference.
    339 // The general form is Segment:[Base+Scale*Index+Disp].
    340 type Mem struct {
    341 	Segment Reg
    342 	Base    Reg
    343 	Scale   uint8
    344 	Index   Reg
    345 	Disp    int64
    346 }
    347 
    348 func (Mem) isArg() {}
    349 
    350 func (m Mem) String() string {
    351 	var base, plus, scale, index, disp string
    352 
    353 	if m.Base != 0 {
    354 		base = m.Base.String()
    355 	}
    356 	if m.Scale != 0 {
    357 		if m.Base != 0 {
    358 			plus = "+"
    359 		}
    360 		if m.Scale > 1 {
    361 			scale = fmt.Sprintf("%d*", m.Scale)
    362 		}
    363 		index = m.Index.String()
    364 	}
    365 	if m.Disp != 0 || m.Base == 0 && m.Scale == 0 {
    366 		disp = fmt.Sprintf("%+#x", m.Disp)
    367 	}
    368 	return "[" + base + plus + scale + index + disp + "]"
    369 }
    370 
    371 // A Rel is an offset relative to the current instruction pointer.
    372 type Rel int32
    373 
    374 func (Rel) isArg() {}
    375 
    376 func (r Rel) String() string {
    377 	return fmt.Sprintf(".%+d", r)
    378 }
    379 
    380 // An Imm is an integer constant.
    381 type Imm int64
    382 
    383 func (Imm) isArg() {}
    384 
    385 func (i Imm) String() string {
    386 	return fmt.Sprintf("%#x", int64(i))
    387 }
    388 
    389 func (i Inst) String() string {
    390 	var buf bytes.Buffer
    391 	for _, p := range i.Prefix {
    392 		if p == 0 {
    393 			break
    394 		}
    395 		if p&PrefixImplicit != 0 {
    396 			continue
    397 		}
    398 		fmt.Fprintf(&buf, "%v ", p)
    399 	}
    400 	fmt.Fprintf(&buf, "%v", i.Op)
    401 	sep := " "
    402 	for _, v := range i.Args {
    403 		if v == nil {
    404 			break
    405 		}
    406 		fmt.Fprintf(&buf, "%s%v", sep, v)
    407 		sep = ", "
    408 	}
    409 	return buf.String()
    410 }
    411 
    412 func isReg(a Arg) bool {
    413 	_, ok := a.(Reg)
    414 	return ok
    415 }
    416 
    417 func isSegReg(a Arg) bool {
    418 	r, ok := a.(Reg)
    419 	return ok && ES <= r && r <= GS
    420 }
    421 
    422 func isMem(a Arg) bool {
    423 	_, ok := a.(Mem)
    424 	return ok
    425 }
    426 
    427 func isImm(a Arg) bool {
    428 	_, ok := a.(Imm)
    429 	return ok
    430 }
    431 
    432 func regBytes(a Arg) int {
    433 	r, ok := a.(Reg)
    434 	if !ok {
    435 		return 0
    436 	}
    437 	if AL <= r && r <= R15B {
    438 		return 1
    439 	}
    440 	if AX <= r && r <= R15W {
    441 		return 2
    442 	}
    443 	if EAX <= r && r <= R15L {
    444 		return 4
    445 	}
    446 	if RAX <= r && r <= R15 {
    447 		return 8
    448 	}
    449 	return 0
    450 }
    451 
    452 func isSegment(p Prefix) bool {
    453 	switch p {
    454 	case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
    455 		return true
    456 	}
    457 	return false
    458 }
    459 
    460 // The Op definitions and string list are in tables.go.
    461 
    462 var prefixNames = map[Prefix]string{
    463 	PrefixCS:       "CS",
    464 	PrefixDS:       "DS",
    465 	PrefixES:       "ES",
    466 	PrefixFS:       "FS",
    467 	PrefixGS:       "GS",
    468 	PrefixSS:       "SS",
    469 	PrefixLOCK:     "LOCK",
    470 	PrefixREP:      "REP",
    471 	PrefixREPN:     "REPN",
    472 	PrefixAddrSize: "ADDRSIZE",
    473 	PrefixDataSize: "DATASIZE",
    474 	PrefixAddr16:   "ADDR16",
    475 	PrefixData16:   "DATA16",
    476 	PrefixAddr32:   "ADDR32",
    477 	PrefixData32:   "DATA32",
    478 	PrefixBND:      "BND",
    479 	PrefixXACQUIRE: "XACQUIRE",
    480 	PrefixXRELEASE: "XRELEASE",
    481 	PrefixREX:      "REX",
    482 	PrefixPT:       "PT",
    483 	PrefixPN:       "PN",
    484 }
    485 
    486 var regNames = [...]string{
    487 	AL:   "AL",
    488 	CL:   "CL",
    489 	BL:   "BL",
    490 	DL:   "DL",
    491 	AH:   "AH",
    492 	CH:   "CH",
    493 	BH:   "BH",
    494 	DH:   "DH",
    495 	SPB:  "SPB",
    496 	BPB:  "BPB",
    497 	SIB:  "SIB",
    498 	DIB:  "DIB",
    499 	R8B:  "R8B",
    500 	R9B:  "R9B",
    501 	R10B: "R10B",
    502 	R11B: "R11B",
    503 	R12B: "R12B",
    504 	R13B: "R13B",
    505 	R14B: "R14B",
    506 	R15B: "R15B",
    507 	AX:   "AX",
    508 	CX:   "CX",
    509 	BX:   "BX",
    510 	DX:   "DX",
    511 	SP:   "SP",
    512 	BP:   "BP",
    513 	SI:   "SI",
    514 	DI:   "DI",
    515 	R8W:  "R8W",
    516 	R9W:  "R9W",
    517 	R10W: "R10W",
    518 	R11W: "R11W",
    519 	R12W: "R12W",
    520 	R13W: "R13W",
    521 	R14W: "R14W",
    522 	R15W: "R15W",
    523 	EAX:  "EAX",
    524 	ECX:  "ECX",
    525 	EDX:  "EDX",
    526 	EBX:  "EBX",
    527 	ESP:  "ESP",
    528 	EBP:  "EBP",
    529 	ESI:  "ESI",
    530 	EDI:  "EDI",
    531 	R8L:  "R8L",
    532 	R9L:  "R9L",
    533 	R10L: "R10L",
    534 	R11L: "R11L",
    535 	R12L: "R12L",
    536 	R13L: "R13L",
    537 	R14L: "R14L",
    538 	R15L: "R15L",
    539 	RAX:  "RAX",
    540 	RCX:  "RCX",
    541 	RDX:  "RDX",
    542 	RBX:  "RBX",
    543 	RSP:  "RSP",
    544 	RBP:  "RBP",
    545 	RSI:  "RSI",
    546 	RDI:  "RDI",
    547 	R8:   "R8",
    548 	R9:   "R9",
    549 	R10:  "R10",
    550 	R11:  "R11",
    551 	R12:  "R12",
    552 	R13:  "R13",
    553 	R14:  "R14",
    554 	R15:  "R15",
    555 	IP:   "IP",
    556 	EIP:  "EIP",
    557 	RIP:  "RIP",
    558 	F0:   "F0",
    559 	F1:   "F1",
    560 	F2:   "F2",
    561 	F3:   "F3",
    562 	F4:   "F4",
    563 	F5:   "F5",
    564 	F6:   "F6",
    565 	F7:   "F7",
    566 	M0:   "M0",
    567 	M1:   "M1",
    568 	M2:   "M2",
    569 	M3:   "M3",
    570 	M4:   "M4",
    571 	M5:   "M5",
    572 	M6:   "M6",
    573 	M7:   "M7",
    574 	X0:   "X0",
    575 	X1:   "X1",
    576 	X2:   "X2",
    577 	X3:   "X3",
    578 	X4:   "X4",
    579 	X5:   "X5",
    580 	X6:   "X6",
    581 	X7:   "X7",
    582 	X8:   "X8",
    583 	X9:   "X9",
    584 	X10:  "X10",
    585 	X11:  "X11",
    586 	X12:  "X12",
    587 	X13:  "X13",
    588 	X14:  "X14",
    589 	X15:  "X15",
    590 	CS:   "CS",
    591 	SS:   "SS",
    592 	DS:   "DS",
    593 	ES:   "ES",
    594 	FS:   "FS",
    595 	GS:   "GS",
    596 	GDTR: "GDTR",
    597 	IDTR: "IDTR",
    598 	LDTR: "LDTR",
    599 	MSW:  "MSW",
    600 	TASK: "TASK",
    601 	CR0:  "CR0",
    602 	CR1:  "CR1",
    603 	CR2:  "CR2",
    604 	CR3:  "CR3",
    605 	CR4:  "CR4",
    606 	CR5:  "CR5",
    607 	CR6:  "CR6",
    608 	CR7:  "CR7",
    609 	CR8:  "CR8",
    610 	CR9:  "CR9",
    611 	CR10: "CR10",
    612 	CR11: "CR11",
    613 	CR12: "CR12",
    614 	CR13: "CR13",
    615 	CR14: "CR14",
    616 	CR15: "CR15",
    617 	DR0:  "DR0",
    618 	DR1:  "DR1",
    619 	DR2:  "DR2",
    620 	DR3:  "DR3",
    621 	DR4:  "DR4",
    622 	DR5:  "DR5",
    623 	DR6:  "DR6",
    624 	DR7:  "DR7",
    625 	DR8:  "DR8",
    626 	DR9:  "DR9",
    627 	DR10: "DR10",
    628 	DR11: "DR11",
    629 	DR12: "DR12",
    630 	DR13: "DR13",
    631 	DR14: "DR14",
    632 	DR15: "DR15",
    633 	TR0:  "TR0",
    634 	TR1:  "TR1",
    635 	TR2:  "TR2",
    636 	TR3:  "TR3",
    637 	TR4:  "TR4",
    638 	TR5:  "TR5",
    639 	TR6:  "TR6",
    640 	TR7:  "TR7",
    641 }
    642