Home | History | Annotate | Download | only in x86asm
      1 // Copyright 2014 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package x86asm implements decoding of x86 machine code.
      6 package x86asm
      7 
      8 import (
      9 	"bytes"
     10 	"fmt"
     11 )
     12 
     13 // An Inst is a single instruction.
     14 type Inst struct {
     15 	Prefix   Prefixes // Prefixes applied to the instruction.
     16 	Op       Op       // Opcode mnemonic
     17 	Opcode   uint32   // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc)
     18 	Args     Args     // Instruction arguments, in Intel order
     19 	Mode     int      // processor mode in bits: 16, 32, or 64
     20 	AddrSize int      // address size in bits: 16, 32, or 64
     21 	DataSize int      // operand size in bits: 16, 32, or 64
     22 	MemBytes int      // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on.
     23 	Len      int      // length of encoded instruction in bytes
     24 	PCRel    int      // length of PC-relative address in instruction encoding
     25 	PCRelOff int      // index of start of PC-relative address in instruction encoding
     26 }
     27 
     28 // Prefixes is an array of prefixes associated with a single instruction.
     29 // The prefixes are listed in the same order as found in the instruction:
     30 // each prefix byte corresponds to one slot in the array. The first zero
     31 // in the array marks the end of the prefixes.
     32 type Prefixes [14]Prefix
     33 
     34 // A Prefix represents an Intel instruction prefix.
     35 // The low 8 bits are the actual prefix byte encoding,
     36 // and the top 8 bits contain distinguishing bits and metadata.
     37 type Prefix uint16
     38 
     39 const (
     40 	// Metadata about the role of a prefix in an instruction.
     41 	PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text
     42 	PrefixIgnored  Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix
     43 	PrefixInvalid  Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK)
     44 
     45 	// Memory segment overrides.
     46 	PrefixES Prefix = 0x26 // ES segment override
     47 	PrefixCS Prefix = 0x2E // CS segment override
     48 	PrefixSS Prefix = 0x36 // SS segment override
     49 	PrefixDS Prefix = 0x3E // DS segment override
     50 	PrefixFS Prefix = 0x64 // FS segment override
     51 	PrefixGS Prefix = 0x65 // GS segment override
     52 
     53 	// Branch prediction.
     54 	PrefixPN Prefix = 0x12E // predict not taken (conditional branch only)
     55 	PrefixPT Prefix = 0x13E // predict taken (conditional branch only)
     56 
     57 	// Size attributes.
     58 	PrefixDataSize Prefix = 0x66 // operand size override
     59 	PrefixData16   Prefix = 0x166
     60 	PrefixData32   Prefix = 0x266
     61 	PrefixAddrSize Prefix = 0x67 // address size override
     62 	PrefixAddr16   Prefix = 0x167
     63 	PrefixAddr32   Prefix = 0x267
     64 
     65 	// One of a kind.
     66 	PrefixLOCK     Prefix = 0xF0 // lock
     67 	PrefixREPN     Prefix = 0xF2 // repeat not zero
     68 	PrefixXACQUIRE Prefix = 0x1F2
     69 	PrefixBND      Prefix = 0x2F2
     70 	PrefixREP      Prefix = 0xF3 // repeat
     71 	PrefixXRELEASE Prefix = 0x1F3
     72 
     73 	// The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10).
     74 	// the other bits are set or not according to the intended use.
     75 	PrefixREX       Prefix = 0x40 // REX 64-bit extension prefix
     76 	PrefixREXW      Prefix = 0x08 // extension bit W (64-bit instruction width)
     77 	PrefixREXR      Prefix = 0x04 // extension bit R (r field in modrm)
     78 	PrefixREXX      Prefix = 0x02 // extension bit X (index field in sib)
     79 	PrefixREXB      Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib)
     80 	PrefixVEX2Bytes Prefix = 0xC5 // Short form of vex prefix
     81 	PrefixVEX3Bytes Prefix = 0xC4 // Long form of vex prefix
     82 )
     83 
     84 // IsREX reports whether p is a REX prefix byte.
     85 func (p Prefix) IsREX() bool {
     86 	return p&0xF0 == PrefixREX
     87 }
     88 
     89 func (p Prefix) IsVEX() bool {
     90 	return p&0xFF == PrefixVEX2Bytes || p&0xFF == PrefixVEX3Bytes
     91 }
     92 
     93 func (p Prefix) String() string {
     94 	p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid
     95 	if s := prefixNames[p]; s != "" {
     96 		return s
     97 	}
     98 
     99 	if p.IsREX() {
    100 		s := "REX."
    101 		if p&PrefixREXW != 0 {
    102 			s += "W"
    103 		}
    104 		if p&PrefixREXR != 0 {
    105 			s += "R"
    106 		}
    107 		if p&PrefixREXX != 0 {
    108 			s += "X"
    109 		}
    110 		if p&PrefixREXB != 0 {
    111 			s += "B"
    112 		}
    113 		return s
    114 	}
    115 
    116 	return fmt.Sprintf("Prefix(%#x)", int(p))
    117 }
    118 
    119 // An Op is an x86 opcode.
    120 type Op uint32
    121 
    122 func (op Op) String() string {
    123 	i := int(op)
    124 	if i < 0 || i >= len(opNames) || opNames[i] == "" {
    125 		return fmt.Sprintf("Op(%d)", i)
    126 	}
    127 	return opNames[i]
    128 }
    129 
    130 // An Args holds the instruction arguments.
    131 // If an instruction has fewer than 4 arguments,
    132 // the final elements in the array are nil.
    133 type Args [4]Arg
    134 
    135 // An Arg is a single instruction argument,
    136 // one of these types: Reg, Mem, Imm, Rel.
    137 type Arg interface {
    138 	String() string
    139 	isArg()
    140 }
    141 
    142 // Note that the implements of Arg that follow are all sized
    143 // so that on a 64-bit machine the data can be inlined in
    144 // the interface value instead of requiring an allocation.
    145 
    146 // A Reg is a single register.
    147 // The zero Reg value has no name but indicates ``no register.''
    148 type Reg uint8
    149 
    150 const (
    151 	_ Reg = iota
    152 
    153 	// 8-bit
    154 	AL
    155 	CL
    156 	DL
    157 	BL
    158 	AH
    159 	CH
    160 	DH
    161 	BH
    162 	SPB
    163 	BPB
    164 	SIB
    165 	DIB
    166 	R8B
    167 	R9B
    168 	R10B
    169 	R11B
    170 	R12B
    171 	R13B
    172 	R14B
    173 	R15B
    174 
    175 	// 16-bit
    176 	AX
    177 	CX
    178 	DX
    179 	BX
    180 	SP
    181 	BP
    182 	SI
    183 	DI
    184 	R8W
    185 	R9W
    186 	R10W
    187 	R11W
    188 	R12W
    189 	R13W
    190 	R14W
    191 	R15W
    192 
    193 	// 32-bit
    194 	EAX
    195 	ECX
    196 	EDX
    197 	EBX
    198 	ESP
    199 	EBP
    200 	ESI
    201 	EDI
    202 	R8L
    203 	R9L
    204 	R10L
    205 	R11L
    206 	R12L
    207 	R13L
    208 	R14L
    209 	R15L
    210 
    211 	// 64-bit
    212 	RAX
    213 	RCX
    214 	RDX
    215 	RBX
    216 	RSP
    217 	RBP
    218 	RSI
    219 	RDI
    220 	R8
    221 	R9
    222 	R10
    223 	R11
    224 	R12
    225 	R13
    226 	R14
    227 	R15
    228 
    229 	// Instruction pointer.
    230 	IP  // 16-bit
    231 	EIP // 32-bit
    232 	RIP // 64-bit
    233 
    234 	// 387 floating point registers.
    235 	F0
    236 	F1
    237 	F2
    238 	F3
    239 	F4
    240 	F5
    241 	F6
    242 	F7
    243 
    244 	// MMX registers.
    245 	M0
    246 	M1
    247 	M2
    248 	M3
    249 	M4
    250 	M5
    251 	M6
    252 	M7
    253 
    254 	// XMM registers.
    255 	X0
    256 	X1
    257 	X2
    258 	X3
    259 	X4
    260 	X5
    261 	X6
    262 	X7
    263 	X8
    264 	X9
    265 	X10
    266 	X11
    267 	X12
    268 	X13
    269 	X14
    270 	X15
    271 
    272 	// Segment registers.
    273 	ES
    274 	CS
    275 	SS
    276 	DS
    277 	FS
    278 	GS
    279 
    280 	// System registers.
    281 	GDTR
    282 	IDTR
    283 	LDTR
    284 	MSW
    285 	TASK
    286 
    287 	// Control registers.
    288 	CR0
    289 	CR1
    290 	CR2
    291 	CR3
    292 	CR4
    293 	CR5
    294 	CR6
    295 	CR7
    296 	CR8
    297 	CR9
    298 	CR10
    299 	CR11
    300 	CR12
    301 	CR13
    302 	CR14
    303 	CR15
    304 
    305 	// Debug registers.
    306 	DR0
    307 	DR1
    308 	DR2
    309 	DR3
    310 	DR4
    311 	DR5
    312 	DR6
    313 	DR7
    314 	DR8
    315 	DR9
    316 	DR10
    317 	DR11
    318 	DR12
    319 	DR13
    320 	DR14
    321 	DR15
    322 
    323 	// Task registers.
    324 	TR0
    325 	TR1
    326 	TR2
    327 	TR3
    328 	TR4
    329 	TR5
    330 	TR6
    331 	TR7
    332 )
    333 
    334 const regMax = TR7
    335 
    336 func (Reg) isArg() {}
    337 
    338 func (r Reg) String() string {
    339 	i := int(r)
    340 	if i < 0 || i >= len(regNames) || regNames[i] == "" {
    341 		return fmt.Sprintf("Reg(%d)", i)
    342 	}
    343 	return regNames[i]
    344 }
    345 
    346 // A Mem is a memory reference.
    347 // The general form is Segment:[Base+Scale*Index+Disp].
    348 type Mem struct {
    349 	Segment Reg
    350 	Base    Reg
    351 	Scale   uint8
    352 	Index   Reg
    353 	Disp    int64
    354 }
    355 
    356 func (Mem) isArg() {}
    357 
    358 func (m Mem) String() string {
    359 	var base, plus, scale, index, disp string
    360 
    361 	if m.Base != 0 {
    362 		base = m.Base.String()
    363 	}
    364 	if m.Scale != 0 {
    365 		if m.Base != 0 {
    366 			plus = "+"
    367 		}
    368 		if m.Scale > 1 {
    369 			scale = fmt.Sprintf("%d*", m.Scale)
    370 		}
    371 		index = m.Index.String()
    372 	}
    373 	if m.Disp != 0 || m.Base == 0 && m.Scale == 0 {
    374 		disp = fmt.Sprintf("%+#x", m.Disp)
    375 	}
    376 	return "[" + base + plus + scale + index + disp + "]"
    377 }
    378 
    379 // A Rel is an offset relative to the current instruction pointer.
    380 type Rel int32
    381 
    382 func (Rel) isArg() {}
    383 
    384 func (r Rel) String() string {
    385 	return fmt.Sprintf(".%+d", r)
    386 }
    387 
    388 // An Imm is an integer constant.
    389 type Imm int64
    390 
    391 func (Imm) isArg() {}
    392 
    393 func (i Imm) String() string {
    394 	return fmt.Sprintf("%#x", int64(i))
    395 }
    396 
    397 func (i Inst) String() string {
    398 	var buf bytes.Buffer
    399 	for _, p := range i.Prefix {
    400 		if p == 0 {
    401 			break
    402 		}
    403 		if p&PrefixImplicit != 0 {
    404 			continue
    405 		}
    406 		fmt.Fprintf(&buf, "%v ", p)
    407 	}
    408 	fmt.Fprintf(&buf, "%v", i.Op)
    409 	sep := " "
    410 	for _, v := range i.Args {
    411 		if v == nil {
    412 			break
    413 		}
    414 		fmt.Fprintf(&buf, "%s%v", sep, v)
    415 		sep = ", "
    416 	}
    417 	return buf.String()
    418 }
    419 
    420 func isReg(a Arg) bool {
    421 	_, ok := a.(Reg)
    422 	return ok
    423 }
    424 
    425 func isSegReg(a Arg) bool {
    426 	r, ok := a.(Reg)
    427 	return ok && ES <= r && r <= GS
    428 }
    429 
    430 func isMem(a Arg) bool {
    431 	_, ok := a.(Mem)
    432 	return ok
    433 }
    434 
    435 func isImm(a Arg) bool {
    436 	_, ok := a.(Imm)
    437 	return ok
    438 }
    439 
    440 func regBytes(a Arg) int {
    441 	r, ok := a.(Reg)
    442 	if !ok {
    443 		return 0
    444 	}
    445 	if AL <= r && r <= R15B {
    446 		return 1
    447 	}
    448 	if AX <= r && r <= R15W {
    449 		return 2
    450 	}
    451 	if EAX <= r && r <= R15L {
    452 		return 4
    453 	}
    454 	if RAX <= r && r <= R15 {
    455 		return 8
    456 	}
    457 	return 0
    458 }
    459 
    460 func isSegment(p Prefix) bool {
    461 	switch p {
    462 	case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
    463 		return true
    464 	}
    465 	return false
    466 }
    467 
    468 // The Op definitions and string list are in tables.go.
    469 
    470 var prefixNames = map[Prefix]string{
    471 	PrefixCS:       "CS",
    472 	PrefixDS:       "DS",
    473 	PrefixES:       "ES",
    474 	PrefixFS:       "FS",
    475 	PrefixGS:       "GS",
    476 	PrefixSS:       "SS",
    477 	PrefixLOCK:     "LOCK",
    478 	PrefixREP:      "REP",
    479 	PrefixREPN:     "REPN",
    480 	PrefixAddrSize: "ADDRSIZE",
    481 	PrefixDataSize: "DATASIZE",
    482 	PrefixAddr16:   "ADDR16",
    483 	PrefixData16:   "DATA16",
    484 	PrefixAddr32:   "ADDR32",
    485 	PrefixData32:   "DATA32",
    486 	PrefixBND:      "BND",
    487 	PrefixXACQUIRE: "XACQUIRE",
    488 	PrefixXRELEASE: "XRELEASE",
    489 	PrefixREX:      "REX",
    490 	PrefixPT:       "PT",
    491 	PrefixPN:       "PN",
    492 }
    493 
    494 var regNames = [...]string{
    495 	AL:   "AL",
    496 	CL:   "CL",
    497 	BL:   "BL",
    498 	DL:   "DL",
    499 	AH:   "AH",
    500 	CH:   "CH",
    501 	BH:   "BH",
    502 	DH:   "DH",
    503 	SPB:  "SPB",
    504 	BPB:  "BPB",
    505 	SIB:  "SIB",
    506 	DIB:  "DIB",
    507 	R8B:  "R8B",
    508 	R9B:  "R9B",
    509 	R10B: "R10B",
    510 	R11B: "R11B",
    511 	R12B: "R12B",
    512 	R13B: "R13B",
    513 	R14B: "R14B",
    514 	R15B: "R15B",
    515 	AX:   "AX",
    516 	CX:   "CX",
    517 	BX:   "BX",
    518 	DX:   "DX",
    519 	SP:   "SP",
    520 	BP:   "BP",
    521 	SI:   "SI",
    522 	DI:   "DI",
    523 	R8W:  "R8W",
    524 	R9W:  "R9W",
    525 	R10W: "R10W",
    526 	R11W: "R11W",
    527 	R12W: "R12W",
    528 	R13W: "R13W",
    529 	R14W: "R14W",
    530 	R15W: "R15W",
    531 	EAX:  "EAX",
    532 	ECX:  "ECX",
    533 	EDX:  "EDX",
    534 	EBX:  "EBX",
    535 	ESP:  "ESP",
    536 	EBP:  "EBP",
    537 	ESI:  "ESI",
    538 	EDI:  "EDI",
    539 	R8L:  "R8L",
    540 	R9L:  "R9L",
    541 	R10L: "R10L",
    542 	R11L: "R11L",
    543 	R12L: "R12L",
    544 	R13L: "R13L",
    545 	R14L: "R14L",
    546 	R15L: "R15L",
    547 	RAX:  "RAX",
    548 	RCX:  "RCX",
    549 	RDX:  "RDX",
    550 	RBX:  "RBX",
    551 	RSP:  "RSP",
    552 	RBP:  "RBP",
    553 	RSI:  "RSI",
    554 	RDI:  "RDI",
    555 	R8:   "R8",
    556 	R9:   "R9",
    557 	R10:  "R10",
    558 	R11:  "R11",
    559 	R12:  "R12",
    560 	R13:  "R13",
    561 	R14:  "R14",
    562 	R15:  "R15",
    563 	IP:   "IP",
    564 	EIP:  "EIP",
    565 	RIP:  "RIP",
    566 	F0:   "F0",
    567 	F1:   "F1",
    568 	F2:   "F2",
    569 	F3:   "F3",
    570 	F4:   "F4",
    571 	F5:   "F5",
    572 	F6:   "F6",
    573 	F7:   "F7",
    574 	M0:   "M0",
    575 	M1:   "M1",
    576 	M2:   "M2",
    577 	M3:   "M3",
    578 	M4:   "M4",
    579 	M5:   "M5",
    580 	M6:   "M6",
    581 	M7:   "M7",
    582 	X0:   "X0",
    583 	X1:   "X1",
    584 	X2:   "X2",
    585 	X3:   "X3",
    586 	X4:   "X4",
    587 	X5:   "X5",
    588 	X6:   "X6",
    589 	X7:   "X7",
    590 	X8:   "X8",
    591 	X9:   "X9",
    592 	X10:  "X10",
    593 	X11:  "X11",
    594 	X12:  "X12",
    595 	X13:  "X13",
    596 	X14:  "X14",
    597 	X15:  "X15",
    598 	CS:   "CS",
    599 	SS:   "SS",
    600 	DS:   "DS",
    601 	ES:   "ES",
    602 	FS:   "FS",
    603 	GS:   "GS",
    604 	GDTR: "GDTR",
    605 	IDTR: "IDTR",
    606 	LDTR: "LDTR",
    607 	MSW:  "MSW",
    608 	TASK: "TASK",
    609 	CR0:  "CR0",
    610 	CR1:  "CR1",
    611 	CR2:  "CR2",
    612 	CR3:  "CR3",
    613 	CR4:  "CR4",
    614 	CR5:  "CR5",
    615 	CR6:  "CR6",
    616 	CR7:  "CR7",
    617 	CR8:  "CR8",
    618 	CR9:  "CR9",
    619 	CR10: "CR10",
    620 	CR11: "CR11",
    621 	CR12: "CR12",
    622 	CR13: "CR13",
    623 	CR14: "CR14",
    624 	CR15: "CR15",
    625 	DR0:  "DR0",
    626 	DR1:  "DR1",
    627 	DR2:  "DR2",
    628 	DR3:  "DR3",
    629 	DR4:  "DR4",
    630 	DR5:  "DR5",
    631 	DR6:  "DR6",
    632 	DR7:  "DR7",
    633 	DR8:  "DR8",
    634 	DR9:  "DR9",
    635 	DR10: "DR10",
    636 	DR11: "DR11",
    637 	DR12: "DR12",
    638 	DR13: "DR13",
    639 	DR14: "DR14",
    640 	DR15: "DR15",
    641 	TR0:  "TR0",
    642 	TR1:  "TR1",
    643 	TR2:  "TR2",
    644 	TR3:  "TR3",
    645 	TR4:  "TR4",
    646 	TR5:  "TR5",
    647 	TR6:  "TR6",
    648 	TR7:  "TR7",
    649 }
    650