Home | History | Annotate | Download | only in x86asm
      1 // Copyright 2014 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Table-driven decoding of x86 instructions.
      6 
      7 package x86asm
      8 
      9 import (
     10 	"encoding/binary"
     11 	"errors"
     12 	"fmt"
     13 	"runtime"
     14 )
     15 
     16 // Set trace to true to cause the decoder to print the PC sequence
     17 // of the executed instruction codes. This is typically only useful
     18 // when you are running a test of a single input case.
     19 const trace = false
     20 
     21 // A decodeOp is a single instruction in the decoder bytecode program.
     22 //
     23 // The decodeOps correspond to consuming and conditionally branching
     24 // on input bytes, consuming additional fields, and then interpreting
     25 // consumed data as instruction arguments. The names of the xRead and xArg
     26 // operations are taken from the Intel manual conventions, for example
     27 // Volume 2, Section 3.1.1, page 487 of
     28 // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
     29 //
     30 // The actual decoding program is generated by ../x86map.
     31 //
     32 // TODO(rsc): We may be able to merge various of the memory operands
     33 // since we don't care about, say, the distinction between m80dec and m80bcd.
     34 // Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
     35 
     36 type decodeOp uint16
     37 
     38 const (
     39 	xFail  decodeOp = iota // invalid instruction (return)
     40 	xMatch                 // completed match
     41 	xJump                  // jump to pc
     42 
     43 	xCondByte     // switch on instruction byte value
     44 	xCondSlashR   // read and switch on instruction /r value
     45 	xCondPrefix   // switch on presence of instruction prefix
     46 	xCondIs64     // switch on 64-bit processor mode
     47 	xCondDataSize // switch on operand size
     48 	xCondAddrSize // switch on address size
     49 	xCondIsMem    // switch on memory vs register argument
     50 
     51 	xSetOp // set instruction opcode
     52 
     53 	xReadSlashR // read /r
     54 	xReadIb     // read ib
     55 	xReadIw     // read iw
     56 	xReadId     // read id
     57 	xReadIo     // read io
     58 	xReadCb     // read cb
     59 	xReadCw     // read cw
     60 	xReadCd     // read cd
     61 	xReadCp     // read cp
     62 	xReadCm     // read cm
     63 
     64 	xArg1            // arg 1
     65 	xArg3            // arg 3
     66 	xArgAL           // arg AL
     67 	xArgAX           // arg AX
     68 	xArgCL           // arg CL
     69 	xArgCR0dashCR7   // arg CR0-CR7
     70 	xArgCS           // arg CS
     71 	xArgDR0dashDR7   // arg DR0-DR7
     72 	xArgDS           // arg DS
     73 	xArgDX           // arg DX
     74 	xArgEAX          // arg EAX
     75 	xArgEDX          // arg EDX
     76 	xArgES           // arg ES
     77 	xArgFS           // arg FS
     78 	xArgGS           // arg GS
     79 	xArgImm16        // arg imm16
     80 	xArgImm32        // arg imm32
     81 	xArgImm64        // arg imm64
     82 	xArgImm8         // arg imm8
     83 	xArgImm8u        // arg imm8 but record as unsigned
     84 	xArgImm16u       // arg imm8 but record as unsigned
     85 	xArgM            // arg m
     86 	xArgM128         // arg m128
     87 	xArgM1428byte    // arg m14/28byte
     88 	xArgM16          // arg m16
     89 	xArgM16and16     // arg m16&16
     90 	xArgM16and32     // arg m16&32
     91 	xArgM16and64     // arg m16&64
     92 	xArgM16colon16   // arg m16:16
     93 	xArgM16colon32   // arg m16:32
     94 	xArgM16colon64   // arg m16:64
     95 	xArgM16int       // arg m16int
     96 	xArgM2byte       // arg m2byte
     97 	xArgM32          // arg m32
     98 	xArgM32and32     // arg m32&32
     99 	xArgM32fp        // arg m32fp
    100 	xArgM32int       // arg m32int
    101 	xArgM512byte     // arg m512byte
    102 	xArgM64          // arg m64
    103 	xArgM64fp        // arg m64fp
    104 	xArgM64int       // arg m64int
    105 	xArgM8           // arg m8
    106 	xArgM80bcd       // arg m80bcd
    107 	xArgM80dec       // arg m80dec
    108 	xArgM80fp        // arg m80fp
    109 	xArgM94108byte   // arg m94/108byte
    110 	xArgMm           // arg mm
    111 	xArgMm1          // arg mm1
    112 	xArgMm2          // arg mm2
    113 	xArgMm2M64       // arg mm2/m64
    114 	xArgMmM32        // arg mm/m32
    115 	xArgMmM64        // arg mm/m64
    116 	xArgMem          // arg mem
    117 	xArgMoffs16      // arg moffs16
    118 	xArgMoffs32      // arg moffs32
    119 	xArgMoffs64      // arg moffs64
    120 	xArgMoffs8       // arg moffs8
    121 	xArgPtr16colon16 // arg ptr16:16
    122 	xArgPtr16colon32 // arg ptr16:32
    123 	xArgR16          // arg r16
    124 	xArgR16op        // arg r16 with +rw in opcode
    125 	xArgR32          // arg r32
    126 	xArgR32M16       // arg r32/m16
    127 	xArgR32M8        // arg r32/m8
    128 	xArgR32op        // arg r32 with +rd in opcode
    129 	xArgR64          // arg r64
    130 	xArgR64M16       // arg r64/m16
    131 	xArgR64op        // arg r64 with +rd in opcode
    132 	xArgR8           // arg r8
    133 	xArgR8op         // arg r8 with +rb in opcode
    134 	xArgRAX          // arg RAX
    135 	xArgRDX          // arg RDX
    136 	xArgRM           // arg r/m
    137 	xArgRM16         // arg r/m16
    138 	xArgRM32         // arg r/m32
    139 	xArgRM64         // arg r/m64
    140 	xArgRM8          // arg r/m8
    141 	xArgReg          // arg reg
    142 	xArgRegM16       // arg reg/m16
    143 	xArgRegM32       // arg reg/m32
    144 	xArgRegM8        // arg reg/m8
    145 	xArgRel16        // arg rel16
    146 	xArgRel32        // arg rel32
    147 	xArgRel8         // arg rel8
    148 	xArgSS           // arg SS
    149 	xArgST           // arg ST, aka ST(0)
    150 	xArgSTi          // arg ST(i) with +i in opcode
    151 	xArgSreg         // arg Sreg
    152 	xArgTR0dashTR7   // arg TR0-TR7
    153 	xArgXmm          // arg xmm
    154 	xArgXMM0         // arg <XMM0>
    155 	xArgXmm1         // arg xmm1
    156 	xArgXmm2         // arg xmm2
    157 	xArgXmm2M128     // arg xmm2/m128
    158 	xArgXmm2M16      // arg xmm2/m16
    159 	xArgXmm2M32      // arg xmm2/m32
    160 	xArgXmm2M64      // arg xmm2/m64
    161 	xArgXmmM128      // arg xmm/m128
    162 	xArgXmmM32       // arg xmm/m32
    163 	xArgXmmM64       // arg xmm/m64
    164 	xArgRmf16        // arg r/m16 but force mod=3
    165 	xArgRmf32        // arg r/m32 but force mod=3
    166 	xArgRmf64        // arg r/m64 but force mod=3
    167 )
    168 
    169 // instPrefix returns an Inst describing just one prefix byte.
    170 // It is only used if there is a prefix followed by an unintelligible
    171 // or invalid instruction byte sequence.
    172 func instPrefix(b byte, mode int) (Inst, error) {
    173 	// When tracing it is useful to see what called instPrefix to report an error.
    174 	if trace {
    175 		_, file, line, _ := runtime.Caller(1)
    176 		fmt.Printf("%s:%d\n", file, line)
    177 	}
    178 	p := Prefix(b)
    179 	switch p {
    180 	case PrefixDataSize:
    181 		if mode == 16 {
    182 			p = PrefixData32
    183 		} else {
    184 			p = PrefixData16
    185 		}
    186 	case PrefixAddrSize:
    187 		if mode == 32 {
    188 			p = PrefixAddr16
    189 		} else {
    190 			p = PrefixAddr32
    191 		}
    192 	}
    193 	// Note: using composite literal with Prefix key confuses 'bundle' tool.
    194 	inst := Inst{Len: 1}
    195 	inst.Prefix = Prefixes{p}
    196 	return inst, nil
    197 }
    198 
    199 // truncated reports a truncated instruction.
    200 // For now we use instPrefix but perhaps later we will return
    201 // a specific error here.
    202 func truncated(src []byte, mode int) (Inst, error) {
    203 	//	return Inst{}, len(src), ErrTruncated
    204 	return instPrefix(src[0], mode) // too long
    205 }
    206 
    207 // These are the errors returned by Decode.
    208 var (
    209 	ErrInvalidMode  = errors.New("invalid x86 mode in Decode")
    210 	ErrTruncated    = errors.New("truncated instruction")
    211 	ErrUnrecognized = errors.New("unrecognized instruction")
    212 )
    213 
    214 // decoderCover records coverage information for which parts
    215 // of the byte code have been executed.
    216 // TODO(rsc): This is for testing. Only use this if a flag is given.
    217 var decoderCover []bool
    218 
    219 // Decode decodes the leading bytes in src as a single instruction.
    220 // The mode arguments specifies the assumed processor mode:
    221 // 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
    222 func Decode(src []byte, mode int) (inst Inst, err error) {
    223 	return decode1(src, mode, false)
    224 }
    225 
    226 // decode1 is the implementation of Decode but takes an extra
    227 // gnuCompat flag to cause it to change its behavior to mimic
    228 // bugs (or at least unique features) of GNU libopcodes as used
    229 // by objdump. We don't believe that logic is the right thing to do
    230 // in general, but when testing against libopcodes it simplifies the
    231 // comparison if we adjust a few small pieces of logic.
    232 // The affected logic is in the conditional branch for "mandatory" prefixes,
    233 // case xCondPrefix.
    234 func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
    235 	switch mode {
    236 	case 16, 32, 64:
    237 		// ok
    238 		// TODO(rsc): 64-bit mode not tested, probably not working.
    239 	default:
    240 		return Inst{}, ErrInvalidMode
    241 	}
    242 
    243 	// Maximum instruction size is 15 bytes.
    244 	// If we need to read more, return 'truncated instruction.
    245 	if len(src) > 15 {
    246 		src = src[:15]
    247 	}
    248 
    249 	var (
    250 		// prefix decoding information
    251 		pos           = 0    // position reading src
    252 		nprefix       = 0    // number of prefixes
    253 		lockIndex     = -1   // index of LOCK prefix in src and inst.Prefix
    254 		repIndex      = -1   // index of REP/REPN prefix in src and inst.Prefix
    255 		segIndex      = -1   // index of Group 2 prefix in src and inst.Prefix
    256 		dataSizeIndex = -1   // index of Group 3 prefix in src and inst.Prefix
    257 		addrSizeIndex = -1   // index of Group 4 prefix in src and inst.Prefix
    258 		rex           Prefix // rex byte if present (or 0)
    259 		rexUsed       Prefix // bits used in rex byte
    260 		rexIndex      = -1   // index of rex byte
    261 
    262 		addrMode = mode // address mode (width in bits)
    263 		dataMode = mode // operand mode (width in bits)
    264 
    265 		// decoded ModR/M fields
    266 		haveModrm bool
    267 		modrm     int
    268 		mod       int
    269 		regop     int
    270 		rm        int
    271 
    272 		// if ModR/M is memory reference, Mem form
    273 		mem     Mem
    274 		haveMem bool
    275 
    276 		// decoded SIB fields
    277 		haveSIB bool
    278 		sib     int
    279 		scale   int
    280 		index   int
    281 		base    int
    282 		displen int
    283 		dispoff int
    284 
    285 		// decoded immediate values
    286 		imm     int64
    287 		imm8    int8
    288 		immc    int64
    289 		immcpos int
    290 
    291 		// output
    292 		opshift int
    293 		inst    Inst
    294 		narg    int // number of arguments written to inst
    295 	)
    296 
    297 	if mode == 64 {
    298 		dataMode = 32
    299 	}
    300 
    301 	// Prefixes are certainly the most complex and underspecified part of
    302 	// decoding x86 instructions. Although the manuals say things like
    303 	// up to four prefixes, one from each group, nearly everyone seems to
    304 	// agree that in practice as many prefixes as possible, including multiple
    305 	// from a particular group or repetitions of a given prefix, can be used on
    306 	// an instruction, provided the total instruction length including prefixes
    307 	// does not exceed the agreed-upon maximum of 15 bytes.
    308 	// Everyone also agrees that if one of these prefixes is the LOCK prefix
    309 	// and the instruction is not one of the instructions that can be used with
    310 	// the LOCK prefix or if the destination is not a memory operand,
    311 	// then the instruction is invalid and produces the #UD exception.
    312 	// However, that is the end of any semblance of agreement.
    313 	//
    314 	// What happens if prefixes are given that conflict with other prefixes?
    315 	// For example, the memory segment overrides CS, DS, ES, FS, GS, SS
    316 	// conflict with each other: only one segment can be in effect.
    317 	// Disassemblers seem to agree that later prefixes take priority over
    318 	// earlier ones. I have not taken the time to write assembly programs
    319 	// to check to see if the hardware agrees.
    320 	//
    321 	// What happens if prefixes are given that have no meaning for the
    322 	// specific instruction to which they are attached? It depends.
    323 	// If they really have no meaning, they are ignored. However, a future
    324 	// processor may assign a different meaning. As a disassembler, we
    325 	// don't really know whether we're seeing a meaningless prefix or one
    326 	// whose meaning we simply haven't been told yet.
    327 	//
    328 	// Combining the two questions, what happens when conflicting
    329 	// extension prefixes are given? No one seems to know for sure.
    330 	// For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r,
    331 	// and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'?
    332 	// Which prefix wins? See the xCondPrefix prefix for more.
    333 	//
    334 	// Writing assembly test cases to divine which interpretation the
    335 	// CPU uses might clarify the situation, but more likely it would
    336 	// make the situation even less clear.
    337 
    338 	// Read non-REX prefixes.
    339 ReadPrefixes:
    340 	for ; pos < len(src); pos++ {
    341 		p := Prefix(src[pos])
    342 		switch p {
    343 		default:
    344 			nprefix = pos
    345 			break ReadPrefixes
    346 
    347 		// Group 1 - lock and repeat prefixes
    348 		// According to Intel, there should only be one from this set,
    349 		// but according to AMD both can be present.
    350 		case 0xF0:
    351 			if lockIndex >= 0 {
    352 				inst.Prefix[lockIndex] |= PrefixIgnored
    353 			}
    354 			lockIndex = pos
    355 		case 0xF2, 0xF3:
    356 			if repIndex >= 0 {
    357 				inst.Prefix[repIndex] |= PrefixIgnored
    358 			}
    359 			repIndex = pos
    360 
    361 		// Group 2 - segment override / branch hints
    362 		case 0x26, 0x2E, 0x36, 0x3E:
    363 			if mode == 64 {
    364 				p |= PrefixIgnored
    365 				break
    366 			}
    367 			fallthrough
    368 		case 0x64, 0x65:
    369 			if segIndex >= 0 {
    370 				inst.Prefix[segIndex] |= PrefixIgnored
    371 			}
    372 			segIndex = pos
    373 
    374 		// Group 3 - operand size override
    375 		case 0x66:
    376 			if mode == 16 {
    377 				dataMode = 32
    378 				p = PrefixData32
    379 			} else {
    380 				dataMode = 16
    381 				p = PrefixData16
    382 			}
    383 			if dataSizeIndex >= 0 {
    384 				inst.Prefix[dataSizeIndex] |= PrefixIgnored
    385 			}
    386 			dataSizeIndex = pos
    387 
    388 		// Group 4 - address size override
    389 		case 0x67:
    390 			if mode == 32 {
    391 				addrMode = 16
    392 				p = PrefixAddr16
    393 			} else {
    394 				addrMode = 32
    395 				p = PrefixAddr32
    396 			}
    397 			if addrSizeIndex >= 0 {
    398 				inst.Prefix[addrSizeIndex] |= PrefixIgnored
    399 			}
    400 			addrSizeIndex = pos
    401 		}
    402 
    403 		if pos >= len(inst.Prefix) {
    404 			return instPrefix(src[0], mode) // too long
    405 		}
    406 
    407 		inst.Prefix[pos] = p
    408 	}
    409 
    410 	// Read REX prefix.
    411 	if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() {
    412 		rex = Prefix(src[pos])
    413 		rexIndex = pos
    414 		if pos >= len(inst.Prefix) {
    415 			return instPrefix(src[0], mode) // too long
    416 		}
    417 		inst.Prefix[pos] = rex
    418 		pos++
    419 		if rex&PrefixREXW != 0 {
    420 			dataMode = 64
    421 			if dataSizeIndex >= 0 {
    422 				inst.Prefix[dataSizeIndex] |= PrefixIgnored
    423 			}
    424 		}
    425 	}
    426 
    427 	// Decode instruction stream, interpreting decoding instructions.
    428 	// opshift gives the shift to use when saving the next
    429 	// opcode byte into inst.Opcode.
    430 	opshift = 24
    431 	if decoderCover == nil {
    432 		decoderCover = make([]bool, len(decoder))
    433 	}
    434 
    435 	// Decode loop, executing decoder program.
    436 	var oldPC, prevPC int
    437 Decode:
    438 	for pc := 1; ; { // TODO uint
    439 		oldPC = prevPC
    440 		prevPC = pc
    441 		if trace {
    442 			println("run", pc)
    443 		}
    444 		x := decoder[pc]
    445 		decoderCover[pc] = true
    446 		pc++
    447 
    448 		// Read and decode ModR/M if needed by opcode.
    449 		switch decodeOp(x) {
    450 		case xCondSlashR, xReadSlashR:
    451 			if haveModrm {
    452 				return Inst{Len: pos}, errInternal
    453 			}
    454 			haveModrm = true
    455 			if pos >= len(src) {
    456 				return truncated(src, mode)
    457 			}
    458 			modrm = int(src[pos])
    459 			pos++
    460 			if opshift >= 0 {
    461 				inst.Opcode |= uint32(modrm) << uint(opshift)
    462 				opshift -= 8
    463 			}
    464 			mod = modrm >> 6
    465 			regop = (modrm >> 3) & 07
    466 			rm = modrm & 07
    467 			if rex&PrefixREXR != 0 {
    468 				rexUsed |= PrefixREXR
    469 				regop |= 8
    470 			}
    471 			if addrMode == 16 {
    472 				// 16-bit modrm form
    473 				if mod != 3 {
    474 					haveMem = true
    475 					mem = addr16[rm]
    476 					if rm == 6 && mod == 0 {
    477 						mem.Base = 0
    478 					}
    479 
    480 					// Consume disp16 if present.
    481 					if mod == 0 && rm == 6 || mod == 2 {
    482 						if pos+2 > len(src) {
    483 							return truncated(src, mode)
    484 						}
    485 						mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
    486 						pos += 2
    487 					}
    488 
    489 					// Consume disp8 if present.
    490 					if mod == 1 {
    491 						if pos >= len(src) {
    492 							return truncated(src, mode)
    493 						}
    494 						mem.Disp = int64(int8(src[pos]))
    495 						pos++
    496 					}
    497 				}
    498 			} else {
    499 				haveMem = mod != 3
    500 
    501 				// 32-bit or 64-bit form
    502 				// Consume SIB encoding if present.
    503 				if rm == 4 && mod != 3 {
    504 					haveSIB = true
    505 					if pos >= len(src) {
    506 						return truncated(src, mode)
    507 					}
    508 					sib = int(src[pos])
    509 					pos++
    510 					if opshift >= 0 {
    511 						inst.Opcode |= uint32(sib) << uint(opshift)
    512 						opshift -= 8
    513 					}
    514 					scale = sib >> 6
    515 					index = (sib >> 3) & 07
    516 					base = sib & 07
    517 					if rex&PrefixREXB != 0 {
    518 						rexUsed |= PrefixREXB
    519 						base |= 8
    520 					}
    521 					if rex&PrefixREXX != 0 {
    522 						rexUsed |= PrefixREXX
    523 						index |= 8
    524 					}
    525 
    526 					mem.Scale = 1 << uint(scale)
    527 					if index == 4 {
    528 						// no mem.Index
    529 					} else {
    530 						mem.Index = baseRegForBits(addrMode) + Reg(index)
    531 					}
    532 					if base&7 == 5 && mod == 0 {
    533 						// no mem.Base
    534 					} else {
    535 						mem.Base = baseRegForBits(addrMode) + Reg(base)
    536 					}
    537 				} else {
    538 					if rex&PrefixREXB != 0 {
    539 						rexUsed |= PrefixREXB
    540 						rm |= 8
    541 					}
    542 					if mod == 0 && rm&7 == 5 || rm&7 == 4 {
    543 						// base omitted
    544 					} else if mod != 3 {
    545 						mem.Base = baseRegForBits(addrMode) + Reg(rm)
    546 					}
    547 				}
    548 
    549 				// Consume disp32 if present.
    550 				if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
    551 					if pos+4 > len(src) {
    552 						return truncated(src, mode)
    553 					}
    554 					dispoff = pos
    555 					displen = 4
    556 					mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
    557 					pos += 4
    558 				}
    559 
    560 				// Consume disp8 if present.
    561 				if mod == 1 {
    562 					if pos >= len(src) {
    563 						return truncated(src, mode)
    564 					}
    565 					dispoff = pos
    566 					displen = 1
    567 					mem.Disp = int64(int8(src[pos]))
    568 					pos++
    569 				}
    570 
    571 				// In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
    572 				// See Vol 2A. Table 2-7.
    573 				if mode == 64 && mod == 0 && rm&7 == 5 {
    574 					if addrMode == 32 {
    575 						mem.Base = EIP
    576 					} else {
    577 						mem.Base = RIP
    578 					}
    579 				}
    580 			}
    581 
    582 			if segIndex >= 0 {
    583 				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
    584 			}
    585 		}
    586 
    587 		// Execute single opcode.
    588 		switch decodeOp(x) {
    589 		default:
    590 			println("bad op", x, "at", pc-1, "from", oldPC)
    591 			return Inst{Len: pos}, errInternal
    592 
    593 		case xFail:
    594 			inst.Op = 0
    595 			break Decode
    596 
    597 		case xMatch:
    598 			break Decode
    599 
    600 		case xJump:
    601 			pc = int(decoder[pc])
    602 
    603 		// Conditional branches.
    604 
    605 		case xCondByte:
    606 			if pos >= len(src) {
    607 				return truncated(src, mode)
    608 			}
    609 			b := src[pos]
    610 			n := int(decoder[pc])
    611 			pc++
    612 			for i := 0; i < n; i++ {
    613 				xb, xpc := decoder[pc], int(decoder[pc+1])
    614 				pc += 2
    615 				if b == byte(xb) {
    616 					pc = xpc
    617 					pos++
    618 					if opshift >= 0 {
    619 						inst.Opcode |= uint32(b) << uint(opshift)
    620 						opshift -= 8
    621 					}
    622 					continue Decode
    623 				}
    624 			}
    625 			// xCondByte is the only conditional with a fall through,
    626 			// so that it can be used to pick off special cases before
    627 			// an xCondSlash. If the fallthrough instruction is xFail,
    628 			// advance the position so that the decoded instruction
    629 			// size includes the byte we just compared against.
    630 			if decodeOp(decoder[pc]) == xJump {
    631 				pc = int(decoder[pc+1])
    632 			}
    633 			if decodeOp(decoder[pc]) == xFail {
    634 				pos++
    635 			}
    636 
    637 		case xCondIs64:
    638 			if mode == 64 {
    639 				pc = int(decoder[pc+1])
    640 			} else {
    641 				pc = int(decoder[pc])
    642 			}
    643 
    644 		case xCondIsMem:
    645 			mem := haveMem
    646 			if !haveModrm {
    647 				if pos >= len(src) {
    648 					return instPrefix(src[0], mode) // too long
    649 				}
    650 				mem = src[pos]>>6 != 3
    651 			}
    652 			if mem {
    653 				pc = int(decoder[pc+1])
    654 			} else {
    655 				pc = int(decoder[pc])
    656 			}
    657 
    658 		case xCondDataSize:
    659 			switch dataMode {
    660 			case 16:
    661 				if dataSizeIndex >= 0 {
    662 					inst.Prefix[dataSizeIndex] |= PrefixImplicit
    663 				}
    664 				pc = int(decoder[pc])
    665 			case 32:
    666 				if dataSizeIndex >= 0 {
    667 					inst.Prefix[dataSizeIndex] |= PrefixImplicit
    668 				}
    669 				pc = int(decoder[pc+1])
    670 			case 64:
    671 				rexUsed |= PrefixREXW
    672 				pc = int(decoder[pc+2])
    673 			}
    674 
    675 		case xCondAddrSize:
    676 			switch addrMode {
    677 			case 16:
    678 				if addrSizeIndex >= 0 {
    679 					inst.Prefix[addrSizeIndex] |= PrefixImplicit
    680 				}
    681 				pc = int(decoder[pc])
    682 			case 32:
    683 				if addrSizeIndex >= 0 {
    684 					inst.Prefix[addrSizeIndex] |= PrefixImplicit
    685 				}
    686 				pc = int(decoder[pc+1])
    687 			case 64:
    688 				pc = int(decoder[pc+2])
    689 			}
    690 
    691 		case xCondPrefix:
    692 			// Conditional branch based on presence or absence of prefixes.
    693 			// The conflict cases here are completely undocumented and
    694 			// differ significantly between GNU libopcodes and Intel xed.
    695 			// I have not written assembly code to divine what various CPUs
    696 			// do, but it wouldn't surprise me if they are not consistent either.
    697 			//
    698 			// The basic idea is to switch on the presence of a prefix, so that
    699 			// for example:
    700 			//
    701 			//	xCondPrefix, 4
    702 			//	0xF3, 123,
    703 			//	0xF2, 234,
    704 			//	0x66, 345,
    705 			//	0, 456
    706 			//
    707 			// branch to 123 if the F3 prefix is present, 234 if the F2 prefix
    708 			// is present, 66 if the 345 prefix is present, and 456 otherwise.
    709 			// The prefixes are given in descending order so that the 0 will be last.
    710 			//
    711 			// It is unclear what should happen if multiple conditions are
    712 			// satisfied: what if F2 and F3 are both present, or if 66 and F2
    713 			// are present, or if all three are present? The one chosen becomes
    714 			// part of the opcode and the others do not. Perhaps the answer
    715 			// depends on the specific opcodes in question.
    716 			//
    717 			// The only clear example is that CRC32 is F2 0F 38 F1 /r, and
    718 			// it comes in 16-bit and 32-bit forms based on the 66 prefix,
    719 			// so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
    720 			// with the 66 being only an operand size override, and probably
    721 			// F2 66 0F 38 F1 /r should be treated the same.
    722 			// Perhaps that rule is specific to the case of CRC32, since no
    723 			// 66 0F 38 F1 instruction is defined (today) (that we know of).
    724 			// However, both libopcodes and xed seem to generalize this
    725 			// example and choose F2/F3 in preference to 66, and we
    726 			// do the same.
    727 			//
    728 			// Next, what if both F2 and F3 are present? Which wins?
    729 			// The Intel xed rule, and ours, is that the one that occurs last wins.
    730 			// The GNU libopcodes rule, which we implement only in gnuCompat mode,
    731 			// is that F3 beats F2 unless F3 has no special meaning, in which
    732 			// case F3 can be a modified on an F2 special meaning.
    733 			//
    734 			// Concretely,
    735 			//	66 0F D6 /r is MOVQ
    736 			//	F2 0F D6 /r is MOVDQ2Q
    737 			//	F3 0F D6 /r is MOVQ2DQ.
    738 			//
    739 			//	F2 66 0F D6 /r is 66 + MOVDQ2Q always.
    740 			//	66 F2 0F D6 /r is 66 + MOVDQ2Q always.
    741 			//	F3 66 0F D6 /r is 66 + MOVQ2DQ always.
    742 			//	66 F3 0F D6 /r is 66 + MOVQ2DQ always.
    743 			//	F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
    744 			//	F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
    745 			//	Adding 66 anywhere in the prefix section of the
    746 			//	last two cases does not change the outcome.
    747 			//
    748 			// Finally, what if there is a variant in which 66 is a mandatory
    749 			// prefix rather than an operand size override, but we know of
    750 			// no corresponding F2/F3 form, and we see both F2/F3 and 66.
    751 			// Does F2/F3 still take priority, so that the result is an unknown
    752 			// instruction, or does the 66 take priority, so that the extended
    753 			// 66 instruction should be interpreted as having a REP/REPN prefix?
    754 			// Intel xed does the former and GNU libopcodes does the latter.
    755 			// We side with Intel xed, unless we are trying to match libopcodes
    756 			// more closely during the comparison-based test suite.
    757 			//
    758 			// In 64-bit mode REX.W is another valid prefix to test for, but
    759 			// there is less ambiguity about that. When present, REX.W is
    760 			// always the first entry in the table.
    761 			n := int(decoder[pc])
    762 			pc++
    763 			sawF3 := false
    764 			for j := 0; j < n; j++ {
    765 				prefix := Prefix(decoder[pc+2*j])
    766 				if prefix.IsREX() {
    767 					rexUsed |= prefix
    768 					if rex&prefix == prefix {
    769 						pc = int(decoder[pc+2*j+1])
    770 						continue Decode
    771 					}
    772 					continue
    773 				}
    774 				ok := false
    775 				if prefix == 0 {
    776 					ok = true
    777 				} else if prefix.IsREX() {
    778 					rexUsed |= prefix
    779 					if rex&prefix == prefix {
    780 						ok = true
    781 					}
    782 				} else {
    783 					if prefix == 0xF3 {
    784 						sawF3 = true
    785 					}
    786 					switch prefix {
    787 					case PrefixLOCK:
    788 						if lockIndex >= 0 {
    789 							inst.Prefix[lockIndex] |= PrefixImplicit
    790 							ok = true
    791 						}
    792 					case PrefixREP, PrefixREPN:
    793 						if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
    794 							inst.Prefix[repIndex] |= PrefixImplicit
    795 							ok = true
    796 						}
    797 						if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
    798 							// Check to see if earlier prefix F3 is present.
    799 							for i := repIndex - 1; i >= 0; i-- {
    800 								if inst.Prefix[i]&0xFF == prefix {
    801 									inst.Prefix[i] |= PrefixImplicit
    802 									ok = true
    803 								}
    804 							}
    805 						}
    806 						if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
    807 							// Check to see if earlier prefix F2 is present.
    808 							for i := repIndex - 1; i >= 0; i-- {
    809 								if inst.Prefix[i]&0xFF == prefix {
    810 									inst.Prefix[i] |= PrefixImplicit
    811 									ok = true
    812 								}
    813 							}
    814 						}
    815 					case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
    816 						if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
    817 							inst.Prefix[segIndex] |= PrefixImplicit
    818 							ok = true
    819 						}
    820 					case PrefixDataSize:
    821 						// Looking for 66 mandatory prefix.
    822 						// The F2/F3 mandatory prefixes take priority when both are present.
    823 						// If we got this far in the xCondPrefix table and an F2/F3 is present,
    824 						// it means the table didn't have any entry for that prefix. But if 66 has
    825 						// special meaning, perhaps F2/F3 have special meaning that we don't know.
    826 						// Intel xed works this way, treating the F2/F3 as inhibiting the 66.
    827 						// GNU libopcodes allows the 66 to match. We do what Intel xed does
    828 						// except in gnuCompat mode.
    829 						if repIndex >= 0 && !gnuCompat {
    830 							inst.Op = 0
    831 							break Decode
    832 						}
    833 						if dataSizeIndex >= 0 {
    834 							inst.Prefix[dataSizeIndex] |= PrefixImplicit
    835 							ok = true
    836 						}
    837 					case PrefixAddrSize:
    838 						if addrSizeIndex >= 0 {
    839 							inst.Prefix[addrSizeIndex] |= PrefixImplicit
    840 							ok = true
    841 						}
    842 					}
    843 				}
    844 				if ok {
    845 					pc = int(decoder[pc+2*j+1])
    846 					continue Decode
    847 				}
    848 			}
    849 			inst.Op = 0
    850 			break Decode
    851 
    852 		case xCondSlashR:
    853 			pc = int(decoder[pc+regop&7])
    854 
    855 		// Input.
    856 
    857 		case xReadSlashR:
    858 			// done above
    859 
    860 		case xReadIb:
    861 			if pos >= len(src) {
    862 				return truncated(src, mode)
    863 			}
    864 			imm8 = int8(src[pos])
    865 			pos++
    866 
    867 		case xReadIw:
    868 			if pos+2 > len(src) {
    869 				return truncated(src, mode)
    870 			}
    871 			imm = int64(binary.LittleEndian.Uint16(src[pos:]))
    872 			pos += 2
    873 
    874 		case xReadId:
    875 			if pos+4 > len(src) {
    876 				return truncated(src, mode)
    877 			}
    878 			imm = int64(binary.LittleEndian.Uint32(src[pos:]))
    879 			pos += 4
    880 
    881 		case xReadIo:
    882 			if pos+8 > len(src) {
    883 				return truncated(src, mode)
    884 			}
    885 			imm = int64(binary.LittleEndian.Uint64(src[pos:]))
    886 			pos += 8
    887 
    888 		case xReadCb:
    889 			if pos >= len(src) {
    890 				return truncated(src, mode)
    891 			}
    892 			immcpos = pos
    893 			immc = int64(src[pos])
    894 			pos++
    895 
    896 		case xReadCw:
    897 			if pos+2 > len(src) {
    898 				return truncated(src, mode)
    899 			}
    900 			immcpos = pos
    901 			immc = int64(binary.LittleEndian.Uint16(src[pos:]))
    902 			pos += 2
    903 
    904 		case xReadCm:
    905 			immcpos = pos
    906 			if addrMode == 16 {
    907 				if pos+2 > len(src) {
    908 					return truncated(src, mode)
    909 				}
    910 				immc = int64(binary.LittleEndian.Uint16(src[pos:]))
    911 				pos += 2
    912 			} else if addrMode == 32 {
    913 				if pos+4 > len(src) {
    914 					return truncated(src, mode)
    915 				}
    916 				immc = int64(binary.LittleEndian.Uint32(src[pos:]))
    917 				pos += 4
    918 			} else {
    919 				if pos+8 > len(src) {
    920 					return truncated(src, mode)
    921 				}
    922 				immc = int64(binary.LittleEndian.Uint64(src[pos:]))
    923 				pos += 8
    924 			}
    925 		case xReadCd:
    926 			immcpos = pos
    927 			if pos+4 > len(src) {
    928 				return truncated(src, mode)
    929 			}
    930 			immc = int64(binary.LittleEndian.Uint32(src[pos:]))
    931 			pos += 4
    932 
    933 		case xReadCp:
    934 			immcpos = pos
    935 			if pos+6 > len(src) {
    936 				return truncated(src, mode)
    937 			}
    938 			w := binary.LittleEndian.Uint32(src[pos:])
    939 			w2 := binary.LittleEndian.Uint16(src[pos+4:])
    940 			immc = int64(w2)<<32 | int64(w)
    941 			pos += 6
    942 
    943 		// Output.
    944 
    945 		case xSetOp:
    946 			inst.Op = Op(decoder[pc])
    947 			pc++
    948 
    949 		case xArg1,
    950 			xArg3,
    951 			xArgAL,
    952 			xArgAX,
    953 			xArgCL,
    954 			xArgCS,
    955 			xArgDS,
    956 			xArgDX,
    957 			xArgEAX,
    958 			xArgEDX,
    959 			xArgES,
    960 			xArgFS,
    961 			xArgGS,
    962 			xArgRAX,
    963 			xArgRDX,
    964 			xArgSS,
    965 			xArgST,
    966 			xArgXMM0:
    967 			inst.Args[narg] = fixedArg[x]
    968 			narg++
    969 
    970 		case xArgImm8:
    971 			inst.Args[narg] = Imm(imm8)
    972 			narg++
    973 
    974 		case xArgImm8u:
    975 			inst.Args[narg] = Imm(uint8(imm8))
    976 			narg++
    977 
    978 		case xArgImm16:
    979 			inst.Args[narg] = Imm(int16(imm))
    980 			narg++
    981 
    982 		case xArgImm16u:
    983 			inst.Args[narg] = Imm(uint16(imm))
    984 			narg++
    985 
    986 		case xArgImm32:
    987 			inst.Args[narg] = Imm(int32(imm))
    988 			narg++
    989 
    990 		case xArgImm64:
    991 			inst.Args[narg] = Imm(imm)
    992 			narg++
    993 
    994 		case xArgM,
    995 			xArgM128,
    996 			xArgM1428byte,
    997 			xArgM16,
    998 			xArgM16and16,
    999 			xArgM16and32,
   1000 			xArgM16and64,
   1001 			xArgM16colon16,
   1002 			xArgM16colon32,
   1003 			xArgM16colon64,
   1004 			xArgM16int,
   1005 			xArgM2byte,
   1006 			xArgM32,
   1007 			xArgM32and32,
   1008 			xArgM32fp,
   1009 			xArgM32int,
   1010 			xArgM512byte,
   1011 			xArgM64,
   1012 			xArgM64fp,
   1013 			xArgM64int,
   1014 			xArgM8,
   1015 			xArgM80bcd,
   1016 			xArgM80dec,
   1017 			xArgM80fp,
   1018 			xArgM94108byte,
   1019 			xArgMem:
   1020 			if !haveMem {
   1021 				inst.Op = 0
   1022 				break Decode
   1023 			}
   1024 			inst.Args[narg] = mem
   1025 			inst.MemBytes = int(memBytes[decodeOp(x)])
   1026 			if mem.Base == RIP {
   1027 				inst.PCRel = displen
   1028 				inst.PCRelOff = dispoff
   1029 			}
   1030 			narg++
   1031 
   1032 		case xArgPtr16colon16:
   1033 			inst.Args[narg] = Imm(immc >> 16)
   1034 			inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
   1035 			narg += 2
   1036 
   1037 		case xArgPtr16colon32:
   1038 			inst.Args[narg] = Imm(immc >> 32)
   1039 			inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
   1040 			narg += 2
   1041 
   1042 		case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
   1043 			// TODO(rsc): Can address be 64 bits?
   1044 			mem = Mem{Disp: immc}
   1045 			if segIndex >= 0 {
   1046 				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
   1047 				inst.Prefix[segIndex] |= PrefixImplicit
   1048 			}
   1049 			inst.Args[narg] = mem
   1050 			inst.MemBytes = int(memBytes[decodeOp(x)])
   1051 			if mem.Base == RIP {
   1052 				inst.PCRel = displen
   1053 				inst.PCRelOff = dispoff
   1054 			}
   1055 			narg++
   1056 
   1057 		case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
   1058 			base := baseReg[x]
   1059 			index := Reg(regop)
   1060 			if rex != 0 && base == AL && index >= 4 {
   1061 				rexUsed |= PrefixREX
   1062 				index -= 4
   1063 				base = SPB
   1064 			}
   1065 			inst.Args[narg] = base + index
   1066 			narg++
   1067 
   1068 		case xArgMm, xArgMm1, xArgTR0dashTR7:
   1069 			inst.Args[narg] = baseReg[x] + Reg(regop&7)
   1070 			narg++
   1071 
   1072 		case xArgCR0dashCR7:
   1073 			// AMD documents an extension that the LOCK prefix
   1074 			// can be used in place of a REX prefix in order to access
   1075 			// CR8 from 32-bit mode. The LOCK prefix is allowed in
   1076 			// all modes, provided the corresponding CPUID bit is set.
   1077 			if lockIndex >= 0 {
   1078 				inst.Prefix[lockIndex] |= PrefixImplicit
   1079 				regop += 8
   1080 			}
   1081 			inst.Args[narg] = CR0 + Reg(regop)
   1082 			narg++
   1083 
   1084 		case xArgSreg:
   1085 			regop &= 7
   1086 			if regop >= 6 {
   1087 				inst.Op = 0
   1088 				break Decode
   1089 			}
   1090 			inst.Args[narg] = ES + Reg(regop)
   1091 			narg++
   1092 
   1093 		case xArgRmf16, xArgRmf32, xArgRmf64:
   1094 			base := baseReg[x]
   1095 			index := Reg(modrm & 07)
   1096 			if rex&PrefixREXB != 0 {
   1097 				rexUsed |= PrefixREXB
   1098 				index += 8
   1099 			}
   1100 			inst.Args[narg] = base + index
   1101 			narg++
   1102 
   1103 		case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
   1104 			n := inst.Opcode >> uint(opshift+8) & 07
   1105 			base := baseReg[x]
   1106 			index := Reg(n)
   1107 			if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
   1108 				rexUsed |= PrefixREXB
   1109 				index += 8
   1110 			}
   1111 			if rex != 0 && base == AL && index >= 4 {
   1112 				rexUsed |= PrefixREX
   1113 				index -= 4
   1114 				base = SPB
   1115 			}
   1116 			inst.Args[narg] = base + index
   1117 			narg++
   1118 
   1119 		case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
   1120 			xArgMmM32, xArgMmM64, xArgMm2M64,
   1121 			xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128:
   1122 			if haveMem {
   1123 				inst.Args[narg] = mem
   1124 				inst.MemBytes = int(memBytes[decodeOp(x)])
   1125 				if mem.Base == RIP {
   1126 					inst.PCRel = displen
   1127 					inst.PCRelOff = dispoff
   1128 				}
   1129 			} else {
   1130 				base := baseReg[x]
   1131 				index := Reg(rm)
   1132 				switch decodeOp(x) {
   1133 				case xArgMmM32, xArgMmM64, xArgMm2M64:
   1134 					// There are only 8 MMX registers, so these ignore the REX.X bit.
   1135 					index &= 7
   1136 				case xArgRM8:
   1137 					if rex != 0 && index >= 4 {
   1138 						rexUsed |= PrefixREX
   1139 						index -= 4
   1140 						base = SPB
   1141 					}
   1142 				}
   1143 				inst.Args[narg] = base + index
   1144 			}
   1145 			narg++
   1146 
   1147 		case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
   1148 			if haveMem {
   1149 				inst.Op = 0
   1150 				break Decode
   1151 			}
   1152 			inst.Args[narg] = baseReg[x] + Reg(rm&7)
   1153 			narg++
   1154 
   1155 		case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
   1156 			if haveMem {
   1157 				inst.Op = 0
   1158 				break Decode
   1159 			}
   1160 			inst.Args[narg] = baseReg[x] + Reg(rm)
   1161 			narg++
   1162 
   1163 		case xArgRel8:
   1164 			inst.PCRelOff = immcpos
   1165 			inst.PCRel = 1
   1166 			inst.Args[narg] = Rel(int8(immc))
   1167 			narg++
   1168 
   1169 		case xArgRel16:
   1170 			inst.PCRelOff = immcpos
   1171 			inst.PCRel = 2
   1172 			inst.Args[narg] = Rel(int16(immc))
   1173 			narg++
   1174 
   1175 		case xArgRel32:
   1176 			inst.PCRelOff = immcpos
   1177 			inst.PCRel = 4
   1178 			inst.Args[narg] = Rel(int32(immc))
   1179 			narg++
   1180 		}
   1181 	}
   1182 
   1183 	if inst.Op == 0 {
   1184 		// Invalid instruction.
   1185 		if nprefix > 0 {
   1186 			return instPrefix(src[0], mode) // invalid instruction
   1187 		}
   1188 		return Inst{Len: pos}, ErrUnrecognized
   1189 	}
   1190 
   1191 	// Matched! Hooray!
   1192 
   1193 	// 90 decodes as XCHG EAX, EAX but is NOP.
   1194 	// 66 90 decodes as XCHG AX, AX and is NOP too.
   1195 	// 48 90 decodes as XCHG RAX, RAX and is NOP too.
   1196 	// 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
   1197 	// F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
   1198 	// It's all too special to handle in the decoding tables, at least for now.
   1199 	if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
   1200 		if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
   1201 			inst.Op = NOP
   1202 			if dataSizeIndex >= 0 {
   1203 				inst.Prefix[dataSizeIndex] &^= PrefixImplicit
   1204 			}
   1205 			inst.Args[0] = nil
   1206 			inst.Args[1] = nil
   1207 		}
   1208 		if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
   1209 			inst.Prefix[repIndex] |= PrefixImplicit
   1210 			inst.Op = PAUSE
   1211 			inst.Args[0] = nil
   1212 			inst.Args[1] = nil
   1213 		} else if gnuCompat {
   1214 			for i := nprefix - 1; i >= 0; i-- {
   1215 				if inst.Prefix[i]&0xFF == 0xF3 {
   1216 					inst.Prefix[i] |= PrefixImplicit
   1217 					inst.Op = PAUSE
   1218 					inst.Args[0] = nil
   1219 					inst.Args[1] = nil
   1220 					break
   1221 				}
   1222 			}
   1223 		}
   1224 	}
   1225 
   1226 	// defaultSeg returns the default segment for an implicit
   1227 	// memory reference: the final override if present, or else DS.
   1228 	defaultSeg := func() Reg {
   1229 		if segIndex >= 0 {
   1230 			inst.Prefix[segIndex] |= PrefixImplicit
   1231 			return prefixToSegment(inst.Prefix[segIndex])
   1232 		}
   1233 		return DS
   1234 	}
   1235 
   1236 	// Add implicit arguments not present in the tables.
   1237 	// Normally we shy away from making implicit arguments explicit,
   1238 	// following the Intel manuals, but adding the arguments seems
   1239 	// the best way to express the effect of the segment override prefixes.
   1240 	// TODO(rsc): Perhaps add these to the tables and
   1241 	// create bytecode instructions for them.
   1242 	usedAddrSize := false
   1243 	switch inst.Op {
   1244 	case INSB, INSW, INSD:
   1245 		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
   1246 		inst.Args[1] = DX
   1247 		usedAddrSize = true
   1248 
   1249 	case OUTSB, OUTSW, OUTSD:
   1250 		inst.Args[0] = DX
   1251 		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
   1252 		usedAddrSize = true
   1253 
   1254 	case MOVSB, MOVSW, MOVSD, MOVSQ:
   1255 		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
   1256 		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
   1257 		usedAddrSize = true
   1258 
   1259 	case CMPSB, CMPSW, CMPSD, CMPSQ:
   1260 		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
   1261 		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
   1262 		usedAddrSize = true
   1263 
   1264 	case LODSB, LODSW, LODSD, LODSQ:
   1265 		switch inst.Op {
   1266 		case LODSB:
   1267 			inst.Args[0] = AL
   1268 		case LODSW:
   1269 			inst.Args[0] = AX
   1270 		case LODSD:
   1271 			inst.Args[0] = EAX
   1272 		case LODSQ:
   1273 			inst.Args[0] = RAX
   1274 		}
   1275 		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
   1276 		usedAddrSize = true
   1277 
   1278 	case STOSB, STOSW, STOSD, STOSQ:
   1279 		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
   1280 		switch inst.Op {
   1281 		case STOSB:
   1282 			inst.Args[1] = AL
   1283 		case STOSW:
   1284 			inst.Args[1] = AX
   1285 		case STOSD:
   1286 			inst.Args[1] = EAX
   1287 		case STOSQ:
   1288 			inst.Args[1] = RAX
   1289 		}
   1290 		usedAddrSize = true
   1291 
   1292 	case SCASB, SCASW, SCASD, SCASQ:
   1293 		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
   1294 		switch inst.Op {
   1295 		case SCASB:
   1296 			inst.Args[0] = AL
   1297 		case SCASW:
   1298 			inst.Args[0] = AX
   1299 		case SCASD:
   1300 			inst.Args[0] = EAX
   1301 		case SCASQ:
   1302 			inst.Args[0] = RAX
   1303 		}
   1304 		usedAddrSize = true
   1305 
   1306 	case XLATB:
   1307 		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
   1308 		usedAddrSize = true
   1309 	}
   1310 
   1311 	// If we used the address size annotation to construct the
   1312 	// argument list, mark that prefix as implicit: it doesn't need
   1313 	// to be shown when printing the instruction.
   1314 	if haveMem || usedAddrSize {
   1315 		if addrSizeIndex >= 0 {
   1316 			inst.Prefix[addrSizeIndex] |= PrefixImplicit
   1317 		}
   1318 	}
   1319 
   1320 	// Similarly, if there's some memory operand, the segment
   1321 	// will be shown there and doesn't need to be shown as an
   1322 	// explicit prefix.
   1323 	if haveMem {
   1324 		if segIndex >= 0 {
   1325 			inst.Prefix[segIndex] |= PrefixImplicit
   1326 		}
   1327 	}
   1328 
   1329 	// Branch predict prefixes are overloaded segment prefixes,
   1330 	// since segment prefixes don't make sense on conditional jumps.
   1331 	// Rewrite final instance to prediction prefix.
   1332 	// The set of instructions to which the prefixes apply (other then the
   1333 	// Jcc conditional jumps) is not 100% clear from the manuals, but
   1334 	// the disassemblers seem to agree about the LOOP and JCXZ instructions,
   1335 	// so we'll follow along.
   1336 	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
   1337 	if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
   1338 	PredictLoop:
   1339 		for i := nprefix - 1; i >= 0; i-- {
   1340 			p := inst.Prefix[i]
   1341 			switch p & 0xFF {
   1342 			case PrefixCS:
   1343 				inst.Prefix[i] = PrefixPN
   1344 				break PredictLoop
   1345 			case PrefixDS:
   1346 				inst.Prefix[i] = PrefixPT
   1347 				break PredictLoop
   1348 			}
   1349 		}
   1350 	}
   1351 
   1352 	// The BND prefix is part of the Intel Memory Protection Extensions (MPX).
   1353 	// A REPN applied to certain control transfers is a BND prefix to bound
   1354 	// the range of possible destinations. There's surprisingly little documentation
   1355 	// about this, so we just do what libopcodes and xed agree on.
   1356 	// In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
   1357 	// does not turn into a BND.
   1358 	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
   1359 	if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
   1360 		for i := nprefix - 1; i >= 0; i-- {
   1361 			p := inst.Prefix[i]
   1362 			if p&^PrefixIgnored == PrefixREPN {
   1363 				inst.Prefix[i] = PrefixBND
   1364 				break
   1365 			}
   1366 		}
   1367 	}
   1368 
   1369 	// The LOCK prefix only applies to certain instructions, and then only
   1370 	// to instances of the instruction with a memory destination.
   1371 	// Other uses of LOCK are invalid and cause a processor exception,
   1372 	// in contrast to the "just ignore it" spirit applied to all other prefixes.
   1373 	// Mark invalid lock prefixes.
   1374 	hasLock := false
   1375 	if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
   1376 		switch inst.Op {
   1377 		// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
   1378 		case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
   1379 			if isMem(inst.Args[0]) {
   1380 				hasLock = true
   1381 				break
   1382 			}
   1383 			fallthrough
   1384 		default:
   1385 			inst.Prefix[lockIndex] |= PrefixInvalid
   1386 		}
   1387 	}
   1388 
   1389 	// In certain cases, all of which require a memory destination,
   1390 	// the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
   1391 	// from the Intel Transactional Synchroniation Extensions (TSX).
   1392 	//
   1393 	// The specific rules are:
   1394 	// (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
   1395 	// (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
   1396 	// (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
   1397 	if isMem(inst.Args[0]) {
   1398 		if inst.Op == XCHG {
   1399 			hasLock = true
   1400 		}
   1401 
   1402 		for i := len(inst.Prefix) - 1; i >= 0; i-- {
   1403 			p := inst.Prefix[i] &^ PrefixIgnored
   1404 			switch p {
   1405 			case PrefixREPN:
   1406 				if hasLock {
   1407 					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
   1408 				}
   1409 
   1410 			case PrefixREP:
   1411 				if hasLock {
   1412 					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
   1413 				}
   1414 
   1415 				if inst.Op == MOV {
   1416 					op := (inst.Opcode >> 24) &^ 1
   1417 					if op == 0x88 || op == 0xC6 {
   1418 						inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
   1419 					}
   1420 				}
   1421 			}
   1422 		}
   1423 	}
   1424 
   1425 	// If REP is used on a non-REP-able instruction, mark the prefix as ignored.
   1426 	if repIndex >= 0 {
   1427 		switch inst.Prefix[repIndex] {
   1428 		case PrefixREP, PrefixREPN:
   1429 			switch inst.Op {
   1430 			// According to the manuals, the REP/REPE prefix applies to all of these,
   1431 			// while the REPN applies only to some of them. However, both libopcodes
   1432 			// and xed show both prefixes explicitly for all instructions, so we do the same.
   1433 			// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
   1434 			case INSB, INSW, INSD,
   1435 				MOVSB, MOVSW, MOVSD, MOVSQ,
   1436 				OUTSB, OUTSW, OUTSD,
   1437 				LODSB, LODSW, LODSD, LODSQ,
   1438 				CMPSB, CMPSW, CMPSD, CMPSQ,
   1439 				SCASB, SCASW, SCASD, SCASQ,
   1440 				STOSB, STOSW, STOSD, STOSQ:
   1441 				// ok
   1442 			default:
   1443 				inst.Prefix[repIndex] |= PrefixIgnored
   1444 			}
   1445 		}
   1446 	}
   1447 
   1448 	// If REX was present, mark implicit if all the 1 bits were consumed.
   1449 	if rexIndex >= 0 {
   1450 		if rexUsed != 0 {
   1451 			rexUsed |= PrefixREX
   1452 		}
   1453 		if rex&^rexUsed == 0 {
   1454 			inst.Prefix[rexIndex] |= PrefixImplicit
   1455 		}
   1456 	}
   1457 
   1458 	inst.DataSize = dataMode
   1459 	inst.AddrSize = addrMode
   1460 	inst.Mode = mode
   1461 	inst.Len = pos
   1462 	return inst, nil
   1463 }
   1464 
   1465 var errInternal = errors.New("internal error")
   1466 
   1467 // addr16 records the eight 16-bit addressing modes.
   1468 var addr16 = [8]Mem{
   1469 	{Base: BX, Scale: 1, Index: SI},
   1470 	{Base: BX, Scale: 1, Index: DI},
   1471 	{Base: BP, Scale: 1, Index: SI},
   1472 	{Base: BP, Scale: 1, Index: DI},
   1473 	{Base: SI},
   1474 	{Base: DI},
   1475 	{Base: BP},
   1476 	{Base: BX},
   1477 }
   1478 
   1479 // baseReg returns the base register for a given register size in bits.
   1480 func baseRegForBits(bits int) Reg {
   1481 	switch bits {
   1482 	case 8:
   1483 		return AL
   1484 	case 16:
   1485 		return AX
   1486 	case 32:
   1487 		return EAX
   1488 	case 64:
   1489 		return RAX
   1490 	}
   1491 	return 0
   1492 }
   1493 
   1494 // baseReg records the base register for argument types that specify
   1495 // a range of registers indexed by op, regop, or rm.
   1496 var baseReg = [...]Reg{
   1497 	xArgDR0dashDR7: DR0,
   1498 	xArgMm1:        M0,
   1499 	xArgMm2:        M0,
   1500 	xArgMm2M64:     M0,
   1501 	xArgMm:         M0,
   1502 	xArgMmM32:      M0,
   1503 	xArgMmM64:      M0,
   1504 	xArgR16:        AX,
   1505 	xArgR16op:      AX,
   1506 	xArgR32:        EAX,
   1507 	xArgR32M16:     EAX,
   1508 	xArgR32M8:      EAX,
   1509 	xArgR32op:      EAX,
   1510 	xArgR64:        RAX,
   1511 	xArgR64M16:     RAX,
   1512 	xArgR64op:      RAX,
   1513 	xArgR8:         AL,
   1514 	xArgR8op:       AL,
   1515 	xArgRM16:       AX,
   1516 	xArgRM32:       EAX,
   1517 	xArgRM64:       RAX,
   1518 	xArgRM8:        AL,
   1519 	xArgRmf16:      AX,
   1520 	xArgRmf32:      EAX,
   1521 	xArgRmf64:      RAX,
   1522 	xArgSTi:        F0,
   1523 	xArgTR0dashTR7: TR0,
   1524 	xArgXmm1:       X0,
   1525 	xArgXmm2:       X0,
   1526 	xArgXmm2M128:   X0,
   1527 	xArgXmm2M16:    X0,
   1528 	xArgXmm2M32:    X0,
   1529 	xArgXmm2M64:    X0,
   1530 	xArgXmm:        X0,
   1531 	xArgXmmM128:    X0,
   1532 	xArgXmmM32:     X0,
   1533 	xArgXmmM64:     X0,
   1534 }
   1535 
   1536 // prefixToSegment returns the segment register
   1537 // corresponding to a particular segment prefix.
   1538 func prefixToSegment(p Prefix) Reg {
   1539 	switch p &^ PrefixImplicit {
   1540 	case PrefixCS:
   1541 		return CS
   1542 	case PrefixDS:
   1543 		return DS
   1544 	case PrefixES:
   1545 		return ES
   1546 	case PrefixFS:
   1547 		return FS
   1548 	case PrefixGS:
   1549 		return GS
   1550 	case PrefixSS:
   1551 		return SS
   1552 	}
   1553 	return 0
   1554 }
   1555 
   1556 // fixedArg records the fixed arguments corresponding to the given bytecodes.
   1557 var fixedArg = [...]Arg{
   1558 	xArg1:    Imm(1),
   1559 	xArg3:    Imm(3),
   1560 	xArgAL:   AL,
   1561 	xArgAX:   AX,
   1562 	xArgDX:   DX,
   1563 	xArgEAX:  EAX,
   1564 	xArgEDX:  EDX,
   1565 	xArgRAX:  RAX,
   1566 	xArgRDX:  RDX,
   1567 	xArgCL:   CL,
   1568 	xArgCS:   CS,
   1569 	xArgDS:   DS,
   1570 	xArgES:   ES,
   1571 	xArgFS:   FS,
   1572 	xArgGS:   GS,
   1573 	xArgSS:   SS,
   1574 	xArgST:   F0,
   1575 	xArgXMM0: X0,
   1576 }
   1577 
   1578 // memBytes records the size of the memory pointed at
   1579 // by a memory argument of the given form.
   1580 var memBytes = [...]int8{
   1581 	xArgM128:       128 / 8,
   1582 	xArgM16:        16 / 8,
   1583 	xArgM16and16:   (16 + 16) / 8,
   1584 	xArgM16colon16: (16 + 16) / 8,
   1585 	xArgM16colon32: (16 + 32) / 8,
   1586 	xArgM16int:     16 / 8,
   1587 	xArgM2byte:     2,
   1588 	xArgM32:        32 / 8,
   1589 	xArgM32and32:   (32 + 32) / 8,
   1590 	xArgM32fp:      32 / 8,
   1591 	xArgM32int:     32 / 8,
   1592 	xArgM64:        64 / 8,
   1593 	xArgM64fp:      64 / 8,
   1594 	xArgM64int:     64 / 8,
   1595 	xArgMm2M64:     64 / 8,
   1596 	xArgMmM32:      32 / 8,
   1597 	xArgMmM64:      64 / 8,
   1598 	xArgMoffs16:    16 / 8,
   1599 	xArgMoffs32:    32 / 8,
   1600 	xArgMoffs64:    64 / 8,
   1601 	xArgMoffs8:     8 / 8,
   1602 	xArgR32M16:     16 / 8,
   1603 	xArgR32M8:      8 / 8,
   1604 	xArgR64M16:     16 / 8,
   1605 	xArgRM16:       16 / 8,
   1606 	xArgRM32:       32 / 8,
   1607 	xArgRM64:       64 / 8,
   1608 	xArgRM8:        8 / 8,
   1609 	xArgXmm2M128:   128 / 8,
   1610 	xArgXmm2M16:    16 / 8,
   1611 	xArgXmm2M32:    32 / 8,
   1612 	xArgXmm2M64:    64 / 8,
   1613 	xArgXmm:        128 / 8,
   1614 	xArgXmmM128:    128 / 8,
   1615 	xArgXmmM32:     32 / 8,
   1616 	xArgXmmM64:     64 / 8,
   1617 }
   1618 
   1619 // isCondJmp records the conditional jumps.
   1620 var isCondJmp = [maxOp + 1]bool{
   1621 	JA:  true,
   1622 	JAE: true,
   1623 	JB:  true,
   1624 	JBE: true,
   1625 	JE:  true,
   1626 	JG:  true,
   1627 	JGE: true,
   1628 	JL:  true,
   1629 	JLE: true,
   1630 	JNE: true,
   1631 	JNO: true,
   1632 	JNP: true,
   1633 	JNS: true,
   1634 	JO:  true,
   1635 	JP:  true,
   1636 	JS:  true,
   1637 }
   1638 
   1639 // isLoop records the loop operators.
   1640 var isLoop = [maxOp + 1]bool{
   1641 	LOOP:   true,
   1642 	LOOPE:  true,
   1643 	LOOPNE: true,
   1644 	JECXZ:  true,
   1645 	JRCXZ:  true,
   1646 }
   1647