1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Table-driven decoding of x86 instructions. 6 7 package x86asm 8 9 import ( 10 "encoding/binary" 11 "errors" 12 "fmt" 13 "runtime" 14 ) 15 16 // Set trace to true to cause the decoder to print the PC sequence 17 // of the executed instruction codes. This is typically only useful 18 // when you are running a test of a single input case. 19 const trace = false 20 21 // A decodeOp is a single instruction in the decoder bytecode program. 22 // 23 // The decodeOps correspond to consuming and conditionally branching 24 // on input bytes, consuming additional fields, and then interpreting 25 // consumed data as instruction arguments. The names of the xRead and xArg 26 // operations are taken from the Intel manual conventions, for example 27 // Volume 2, Section 3.1.1, page 487 of 28 // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf 29 // 30 // The actual decoding program is generated by ../x86map. 31 // 32 // TODO(rsc): We may be able to merge various of the memory operands 33 // since we don't care about, say, the distinction between m80dec and m80bcd. 34 // Similarly, mm and mm1 have identical meaning, as do xmm and xmm1. 35 36 type decodeOp uint16 37 38 const ( 39 xFail decodeOp = iota // invalid instruction (return) 40 xMatch // completed match 41 xJump // jump to pc 42 43 xCondByte // switch on instruction byte value 44 xCondSlashR // read and switch on instruction /r value 45 xCondPrefix // switch on presence of instruction prefix 46 xCondIs64 // switch on 64-bit processor mode 47 xCondDataSize // switch on operand size 48 xCondAddrSize // switch on address size 49 xCondIsMem // switch on memory vs register argument 50 51 xSetOp // set instruction opcode 52 53 xReadSlashR // read /r 54 xReadIb // read ib 55 xReadIw // read iw 56 xReadId // read id 57 xReadIo // read io 58 xReadCb // read cb 59 xReadCw // read cw 60 xReadCd // read cd 61 xReadCp // read cp 62 xReadCm // read cm 63 64 xArg1 // arg 1 65 xArg3 // arg 3 66 xArgAL // arg AL 67 xArgAX // arg AX 68 xArgCL // arg CL 69 xArgCR0dashCR7 // arg CR0-CR7 70 xArgCS // arg CS 71 xArgDR0dashDR7 // arg DR0-DR7 72 xArgDS // arg DS 73 xArgDX // arg DX 74 xArgEAX // arg EAX 75 xArgEDX // arg EDX 76 xArgES // arg ES 77 xArgFS // arg FS 78 xArgGS // arg GS 79 xArgImm16 // arg imm16 80 xArgImm32 // arg imm32 81 xArgImm64 // arg imm64 82 xArgImm8 // arg imm8 83 xArgImm8u // arg imm8 but record as unsigned 84 xArgImm16u // arg imm8 but record as unsigned 85 xArgM // arg m 86 xArgM128 // arg m128 87 xArgM256 // arg m256 88 xArgM1428byte // arg m14/28byte 89 xArgM16 // arg m16 90 xArgM16and16 // arg m16&16 91 xArgM16and32 // arg m16&32 92 xArgM16and64 // arg m16&64 93 xArgM16colon16 // arg m16:16 94 xArgM16colon32 // arg m16:32 95 xArgM16colon64 // arg m16:64 96 xArgM16int // arg m16int 97 xArgM2byte // arg m2byte 98 xArgM32 // arg m32 99 xArgM32and32 // arg m32&32 100 xArgM32fp // arg m32fp 101 xArgM32int // arg m32int 102 xArgM512byte // arg m512byte 103 xArgM64 // arg m64 104 xArgM64fp // arg m64fp 105 xArgM64int // arg m64int 106 xArgM8 // arg m8 107 xArgM80bcd // arg m80bcd 108 xArgM80dec // arg m80dec 109 xArgM80fp // arg m80fp 110 xArgM94108byte // arg m94/108byte 111 xArgMm // arg mm 112 xArgMm1 // arg mm1 113 xArgMm2 // arg mm2 114 xArgMm2M64 // arg mm2/m64 115 xArgMmM32 // arg mm/m32 116 xArgMmM64 // arg mm/m64 117 xArgMem // arg mem 118 xArgMoffs16 // arg moffs16 119 xArgMoffs32 // arg moffs32 120 xArgMoffs64 // arg moffs64 121 xArgMoffs8 // arg moffs8 122 xArgPtr16colon16 // arg ptr16:16 123 xArgPtr16colon32 // arg ptr16:32 124 xArgR16 // arg r16 125 xArgR16op // arg r16 with +rw in opcode 126 xArgR32 // arg r32 127 xArgR32M16 // arg r32/m16 128 xArgR32M8 // arg r32/m8 129 xArgR32op // arg r32 with +rd in opcode 130 xArgR64 // arg r64 131 xArgR64M16 // arg r64/m16 132 xArgR64op // arg r64 with +rd in opcode 133 xArgR8 // arg r8 134 xArgR8op // arg r8 with +rb in opcode 135 xArgRAX // arg RAX 136 xArgRDX // arg RDX 137 xArgRM // arg r/m 138 xArgRM16 // arg r/m16 139 xArgRM32 // arg r/m32 140 xArgRM64 // arg r/m64 141 xArgRM8 // arg r/m8 142 xArgReg // arg reg 143 xArgRegM16 // arg reg/m16 144 xArgRegM32 // arg reg/m32 145 xArgRegM8 // arg reg/m8 146 xArgRel16 // arg rel16 147 xArgRel32 // arg rel32 148 xArgRel8 // arg rel8 149 xArgSS // arg SS 150 xArgST // arg ST, aka ST(0) 151 xArgSTi // arg ST(i) with +i in opcode 152 xArgSreg // arg Sreg 153 xArgTR0dashTR7 // arg TR0-TR7 154 xArgXmm // arg xmm 155 xArgXMM0 // arg <XMM0> 156 xArgXmm1 // arg xmm1 157 xArgXmm2 // arg xmm2 158 xArgXmm2M128 // arg xmm2/m128 159 xArgYmm2M256 // arg ymm2/m256 160 xArgXmm2M16 // arg xmm2/m16 161 xArgXmm2M32 // arg xmm2/m32 162 xArgXmm2M64 // arg xmm2/m64 163 xArgXmmM128 // arg xmm/m128 164 xArgXmmM32 // arg xmm/m32 165 xArgXmmM64 // arg xmm/m64 166 xArgYmm1 // arg ymm1 167 xArgRmf16 // arg r/m16 but force mod=3 168 xArgRmf32 // arg r/m32 but force mod=3 169 xArgRmf64 // arg r/m64 but force mod=3 170 ) 171 172 // instPrefix returns an Inst describing just one prefix byte. 173 // It is only used if there is a prefix followed by an unintelligible 174 // or invalid instruction byte sequence. 175 func instPrefix(b byte, mode int) (Inst, error) { 176 // When tracing it is useful to see what called instPrefix to report an error. 177 if trace { 178 _, file, line, _ := runtime.Caller(1) 179 fmt.Printf("%s:%d\n", file, line) 180 } 181 p := Prefix(b) 182 switch p { 183 case PrefixDataSize: 184 if mode == 16 { 185 p = PrefixData32 186 } else { 187 p = PrefixData16 188 } 189 case PrefixAddrSize: 190 if mode == 32 { 191 p = PrefixAddr16 192 } else { 193 p = PrefixAddr32 194 } 195 } 196 // Note: using composite literal with Prefix key confuses 'bundle' tool. 197 inst := Inst{Len: 1} 198 inst.Prefix = Prefixes{p} 199 return inst, nil 200 } 201 202 // truncated reports a truncated instruction. 203 // For now we use instPrefix but perhaps later we will return 204 // a specific error here. 205 func truncated(src []byte, mode int) (Inst, error) { 206 // return Inst{}, len(src), ErrTruncated 207 return instPrefix(src[0], mode) // too long 208 } 209 210 // These are the errors returned by Decode. 211 var ( 212 ErrInvalidMode = errors.New("invalid x86 mode in Decode") 213 ErrTruncated = errors.New("truncated instruction") 214 ErrUnrecognized = errors.New("unrecognized instruction") 215 ) 216 217 // decoderCover records coverage information for which parts 218 // of the byte code have been executed. 219 // TODO(rsc): This is for testing. Only use this if a flag is given. 220 var decoderCover []bool 221 222 // Decode decodes the leading bytes in src as a single instruction. 223 // The mode arguments specifies the assumed processor mode: 224 // 16, 32, or 64 for 16-, 32-, and 64-bit execution modes. 225 func Decode(src []byte, mode int) (inst Inst, err error) { 226 return decode1(src, mode, false) 227 } 228 229 // decode1 is the implementation of Decode but takes an extra 230 // gnuCompat flag to cause it to change its behavior to mimic 231 // bugs (or at least unique features) of GNU libopcodes as used 232 // by objdump. We don't believe that logic is the right thing to do 233 // in general, but when testing against libopcodes it simplifies the 234 // comparison if we adjust a few small pieces of logic. 235 // The affected logic is in the conditional branch for "mandatory" prefixes, 236 // case xCondPrefix. 237 func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) { 238 switch mode { 239 case 16, 32, 64: 240 // ok 241 // TODO(rsc): 64-bit mode not tested, probably not working. 242 default: 243 return Inst{}, ErrInvalidMode 244 } 245 246 // Maximum instruction size is 15 bytes. 247 // If we need to read more, return 'truncated instruction. 248 if len(src) > 15 { 249 src = src[:15] 250 } 251 252 var ( 253 // prefix decoding information 254 pos = 0 // position reading src 255 nprefix = 0 // number of prefixes 256 lockIndex = -1 // index of LOCK prefix in src and inst.Prefix 257 repIndex = -1 // index of REP/REPN prefix in src and inst.Prefix 258 segIndex = -1 // index of Group 2 prefix in src and inst.Prefix 259 dataSizeIndex = -1 // index of Group 3 prefix in src and inst.Prefix 260 addrSizeIndex = -1 // index of Group 4 prefix in src and inst.Prefix 261 rex Prefix // rex byte if present (or 0) 262 rexUsed Prefix // bits used in rex byte 263 rexIndex = -1 // index of rex byte 264 vex Prefix // use vex encoding 265 vexIndex = -1 // index of vex prefix 266 267 addrMode = mode // address mode (width in bits) 268 dataMode = mode // operand mode (width in bits) 269 270 // decoded ModR/M fields 271 haveModrm bool 272 modrm int 273 mod int 274 regop int 275 rm int 276 277 // if ModR/M is memory reference, Mem form 278 mem Mem 279 haveMem bool 280 281 // decoded SIB fields 282 haveSIB bool 283 sib int 284 scale int 285 index int 286 base int 287 displen int 288 dispoff int 289 290 // decoded immediate values 291 imm int64 292 imm8 int8 293 immc int64 294 immcpos int 295 296 // output 297 opshift int 298 inst Inst 299 narg int // number of arguments written to inst 300 ) 301 302 if mode == 64 { 303 dataMode = 32 304 } 305 306 // Prefixes are certainly the most complex and underspecified part of 307 // decoding x86 instructions. Although the manuals say things like 308 // up to four prefixes, one from each group, nearly everyone seems to 309 // agree that in practice as many prefixes as possible, including multiple 310 // from a particular group or repetitions of a given prefix, can be used on 311 // an instruction, provided the total instruction length including prefixes 312 // does not exceed the agreed-upon maximum of 15 bytes. 313 // Everyone also agrees that if one of these prefixes is the LOCK prefix 314 // and the instruction is not one of the instructions that can be used with 315 // the LOCK prefix or if the destination is not a memory operand, 316 // then the instruction is invalid and produces the #UD exception. 317 // However, that is the end of any semblance of agreement. 318 // 319 // What happens if prefixes are given that conflict with other prefixes? 320 // For example, the memory segment overrides CS, DS, ES, FS, GS, SS 321 // conflict with each other: only one segment can be in effect. 322 // Disassemblers seem to agree that later prefixes take priority over 323 // earlier ones. I have not taken the time to write assembly programs 324 // to check to see if the hardware agrees. 325 // 326 // What happens if prefixes are given that have no meaning for the 327 // specific instruction to which they are attached? It depends. 328 // If they really have no meaning, they are ignored. However, a future 329 // processor may assign a different meaning. As a disassembler, we 330 // don't really know whether we're seeing a meaningless prefix or one 331 // whose meaning we simply haven't been told yet. 332 // 333 // Combining the two questions, what happens when conflicting 334 // extension prefixes are given? No one seems to know for sure. 335 // For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r, 336 // and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'? 337 // Which prefix wins? See the xCondPrefix prefix for more. 338 // 339 // Writing assembly test cases to divine which interpretation the 340 // CPU uses might clarify the situation, but more likely it would 341 // make the situation even less clear. 342 343 // Read non-REX prefixes. 344 ReadPrefixes: 345 for ; pos < len(src); pos++ { 346 p := Prefix(src[pos]) 347 switch p { 348 default: 349 nprefix = pos 350 break ReadPrefixes 351 352 // Group 1 - lock and repeat prefixes 353 // According to Intel, there should only be one from this set, 354 // but according to AMD both can be present. 355 case 0xF0: 356 if lockIndex >= 0 { 357 inst.Prefix[lockIndex] |= PrefixIgnored 358 } 359 lockIndex = pos 360 case 0xF2, 0xF3: 361 if repIndex >= 0 { 362 inst.Prefix[repIndex] |= PrefixIgnored 363 } 364 repIndex = pos 365 366 // Group 2 - segment override / branch hints 367 case 0x26, 0x2E, 0x36, 0x3E: 368 if mode == 64 { 369 p |= PrefixIgnored 370 break 371 } 372 fallthrough 373 case 0x64, 0x65: 374 if segIndex >= 0 { 375 inst.Prefix[segIndex] |= PrefixIgnored 376 } 377 segIndex = pos 378 379 // Group 3 - operand size override 380 case 0x66: 381 if mode == 16 { 382 dataMode = 32 383 p = PrefixData32 384 } else { 385 dataMode = 16 386 p = PrefixData16 387 } 388 if dataSizeIndex >= 0 { 389 inst.Prefix[dataSizeIndex] |= PrefixIgnored 390 } 391 dataSizeIndex = pos 392 393 // Group 4 - address size override 394 case 0x67: 395 if mode == 32 { 396 addrMode = 16 397 p = PrefixAddr16 398 } else { 399 addrMode = 32 400 p = PrefixAddr32 401 } 402 if addrSizeIndex >= 0 { 403 inst.Prefix[addrSizeIndex] |= PrefixIgnored 404 } 405 addrSizeIndex = pos 406 407 //Group 5 - Vex encoding 408 case 0xC5: 409 if pos == 0 && (mode == 64 || (mode == 32 && pos+1 < len(src) && src[pos+1]&0xc0 == 0xc0)) { 410 vex = p 411 vexIndex = pos 412 inst.Prefix[pos] = p 413 inst.Prefix[pos+1] = Prefix(src[pos+1]) 414 pos += 1 415 continue 416 } else { 417 nprefix = pos 418 break ReadPrefixes 419 } 420 case 0xC4: 421 if pos == 0 && (mode == 64 || (mode == 32 && pos+2 < len(src) && src[pos+1]&0xc0 == 0xc0)) { 422 vex = p 423 vexIndex = pos 424 inst.Prefix[pos] = p 425 inst.Prefix[pos+1] = Prefix(src[pos+1]) 426 inst.Prefix[pos+2] = Prefix(src[pos+2]) 427 pos += 2 428 continue 429 } else { 430 nprefix = pos 431 break ReadPrefixes 432 } 433 } 434 435 if pos >= len(inst.Prefix) { 436 return instPrefix(src[0], mode) // too long 437 } 438 439 inst.Prefix[pos] = p 440 } 441 442 // Read REX prefix. 443 if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() && vex == 0 { 444 rex = Prefix(src[pos]) 445 rexIndex = pos 446 if pos >= len(inst.Prefix) { 447 return instPrefix(src[0], mode) // too long 448 } 449 inst.Prefix[pos] = rex 450 pos++ 451 if rex&PrefixREXW != 0 { 452 dataMode = 64 453 if dataSizeIndex >= 0 { 454 inst.Prefix[dataSizeIndex] |= PrefixIgnored 455 } 456 } 457 } 458 459 // Decode instruction stream, interpreting decoding instructions. 460 // opshift gives the shift to use when saving the next 461 // opcode byte into inst.Opcode. 462 opshift = 24 463 if decoderCover == nil { 464 decoderCover = make([]bool, len(decoder)) 465 } 466 467 // Decode loop, executing decoder program. 468 var oldPC, prevPC int 469 Decode: 470 for pc := 1; ; { // TODO uint 471 oldPC = prevPC 472 prevPC = pc 473 if trace { 474 println("run", pc) 475 } 476 x := decoder[pc] 477 decoderCover[pc] = true 478 pc++ 479 480 // Read and decode ModR/M if needed by opcode. 481 switch decodeOp(x) { 482 case xCondSlashR, xReadSlashR: 483 if haveModrm { 484 return Inst{Len: pos}, errInternal 485 } 486 haveModrm = true 487 if pos >= len(src) { 488 return truncated(src, mode) 489 } 490 modrm = int(src[pos]) 491 pos++ 492 if opshift >= 0 { 493 inst.Opcode |= uint32(modrm) << uint(opshift) 494 opshift -= 8 495 } 496 mod = modrm >> 6 497 regop = (modrm >> 3) & 07 498 rm = modrm & 07 499 if rex&PrefixREXR != 0 { 500 rexUsed |= PrefixREXR 501 regop |= 8 502 } 503 if addrMode == 16 { 504 // 16-bit modrm form 505 if mod != 3 { 506 haveMem = true 507 mem = addr16[rm] 508 if rm == 6 && mod == 0 { 509 mem.Base = 0 510 } 511 512 // Consume disp16 if present. 513 if mod == 0 && rm == 6 || mod == 2 { 514 if pos+2 > len(src) { 515 return truncated(src, mode) 516 } 517 mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:])) 518 pos += 2 519 } 520 521 // Consume disp8 if present. 522 if mod == 1 { 523 if pos >= len(src) { 524 return truncated(src, mode) 525 } 526 mem.Disp = int64(int8(src[pos])) 527 pos++ 528 } 529 } 530 } else { 531 haveMem = mod != 3 532 533 // 32-bit or 64-bit form 534 // Consume SIB encoding if present. 535 if rm == 4 && mod != 3 { 536 haveSIB = true 537 if pos >= len(src) { 538 return truncated(src, mode) 539 } 540 sib = int(src[pos]) 541 pos++ 542 if opshift >= 0 { 543 inst.Opcode |= uint32(sib) << uint(opshift) 544 opshift -= 8 545 } 546 scale = sib >> 6 547 index = (sib >> 3) & 07 548 base = sib & 07 549 if rex&PrefixREXB != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x20 == 0 { 550 rexUsed |= PrefixREXB 551 base |= 8 552 } 553 if rex&PrefixREXX != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0 { 554 rexUsed |= PrefixREXX 555 index |= 8 556 } 557 558 mem.Scale = 1 << uint(scale) 559 if index == 4 { 560 // no mem.Index 561 } else { 562 mem.Index = baseRegForBits(addrMode) + Reg(index) 563 } 564 if base&7 == 5 && mod == 0 { 565 // no mem.Base 566 } else { 567 mem.Base = baseRegForBits(addrMode) + Reg(base) 568 } 569 } else { 570 if rex&PrefixREXB != 0 { 571 rexUsed |= PrefixREXB 572 rm |= 8 573 } 574 if mod == 0 && rm&7 == 5 || rm&7 == 4 { 575 // base omitted 576 } else if mod != 3 { 577 mem.Base = baseRegForBits(addrMode) + Reg(rm) 578 } 579 } 580 581 // Consume disp32 if present. 582 if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 { 583 if pos+4 > len(src) { 584 return truncated(src, mode) 585 } 586 dispoff = pos 587 displen = 4 588 mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:])) 589 pos += 4 590 } 591 592 // Consume disp8 if present. 593 if mod == 1 { 594 if pos >= len(src) { 595 return truncated(src, mode) 596 } 597 dispoff = pos 598 displen = 1 599 mem.Disp = int64(int8(src[pos])) 600 pos++ 601 } 602 603 // In 64-bit, mod=0 rm=5 is PC-relative instead of just disp. 604 // See Vol 2A. Table 2-7. 605 if mode == 64 && mod == 0 && rm&7 == 5 { 606 if addrMode == 32 { 607 mem.Base = EIP 608 } else { 609 mem.Base = RIP 610 } 611 } 612 } 613 614 if segIndex >= 0 { 615 mem.Segment = prefixToSegment(inst.Prefix[segIndex]) 616 } 617 } 618 619 // Execute single opcode. 620 switch decodeOp(x) { 621 default: 622 println("bad op", x, "at", pc-1, "from", oldPC) 623 return Inst{Len: pos}, errInternal 624 625 case xFail: 626 inst.Op = 0 627 break Decode 628 629 case xMatch: 630 break Decode 631 632 case xJump: 633 pc = int(decoder[pc]) 634 635 // Conditional branches. 636 637 case xCondByte: 638 if pos >= len(src) { 639 return truncated(src, mode) 640 } 641 b := src[pos] 642 n := int(decoder[pc]) 643 pc++ 644 for i := 0; i < n; i++ { 645 xb, xpc := decoder[pc], int(decoder[pc+1]) 646 pc += 2 647 if b == byte(xb) { 648 pc = xpc 649 pos++ 650 if opshift >= 0 { 651 inst.Opcode |= uint32(b) << uint(opshift) 652 opshift -= 8 653 } 654 continue Decode 655 } 656 } 657 // xCondByte is the only conditional with a fall through, 658 // so that it can be used to pick off special cases before 659 // an xCondSlash. If the fallthrough instruction is xFail, 660 // advance the position so that the decoded instruction 661 // size includes the byte we just compared against. 662 if decodeOp(decoder[pc]) == xJump { 663 pc = int(decoder[pc+1]) 664 } 665 if decodeOp(decoder[pc]) == xFail { 666 pos++ 667 } 668 669 case xCondIs64: 670 if mode == 64 { 671 pc = int(decoder[pc+1]) 672 } else { 673 pc = int(decoder[pc]) 674 } 675 676 case xCondIsMem: 677 mem := haveMem 678 if !haveModrm { 679 if pos >= len(src) { 680 return instPrefix(src[0], mode) // too long 681 } 682 mem = src[pos]>>6 != 3 683 } 684 if mem { 685 pc = int(decoder[pc+1]) 686 } else { 687 pc = int(decoder[pc]) 688 } 689 690 case xCondDataSize: 691 switch dataMode { 692 case 16: 693 if dataSizeIndex >= 0 { 694 inst.Prefix[dataSizeIndex] |= PrefixImplicit 695 } 696 pc = int(decoder[pc]) 697 case 32: 698 if dataSizeIndex >= 0 { 699 inst.Prefix[dataSizeIndex] |= PrefixImplicit 700 } 701 pc = int(decoder[pc+1]) 702 case 64: 703 rexUsed |= PrefixREXW 704 pc = int(decoder[pc+2]) 705 } 706 707 case xCondAddrSize: 708 switch addrMode { 709 case 16: 710 if addrSizeIndex >= 0 { 711 inst.Prefix[addrSizeIndex] |= PrefixImplicit 712 } 713 pc = int(decoder[pc]) 714 case 32: 715 if addrSizeIndex >= 0 { 716 inst.Prefix[addrSizeIndex] |= PrefixImplicit 717 } 718 pc = int(decoder[pc+1]) 719 case 64: 720 pc = int(decoder[pc+2]) 721 } 722 723 case xCondPrefix: 724 // Conditional branch based on presence or absence of prefixes. 725 // The conflict cases here are completely undocumented and 726 // differ significantly between GNU libopcodes and Intel xed. 727 // I have not written assembly code to divine what various CPUs 728 // do, but it wouldn't surprise me if they are not consistent either. 729 // 730 // The basic idea is to switch on the presence of a prefix, so that 731 // for example: 732 // 733 // xCondPrefix, 4 734 // 0xF3, 123, 735 // 0xF2, 234, 736 // 0x66, 345, 737 // 0, 456 738 // 739 // branch to 123 if the F3 prefix is present, 234 if the F2 prefix 740 // is present, 66 if the 345 prefix is present, and 456 otherwise. 741 // The prefixes are given in descending order so that the 0 will be last. 742 // 743 // It is unclear what should happen if multiple conditions are 744 // satisfied: what if F2 and F3 are both present, or if 66 and F2 745 // are present, or if all three are present? The one chosen becomes 746 // part of the opcode and the others do not. Perhaps the answer 747 // depends on the specific opcodes in question. 748 // 749 // The only clear example is that CRC32 is F2 0F 38 F1 /r, and 750 // it comes in 16-bit and 32-bit forms based on the 66 prefix, 751 // so 66 F2 0F 38 F1 /r should be treated as F2 taking priority, 752 // with the 66 being only an operand size override, and probably 753 // F2 66 0F 38 F1 /r should be treated the same. 754 // Perhaps that rule is specific to the case of CRC32, since no 755 // 66 0F 38 F1 instruction is defined (today) (that we know of). 756 // However, both libopcodes and xed seem to generalize this 757 // example and choose F2/F3 in preference to 66, and we 758 // do the same. 759 // 760 // Next, what if both F2 and F3 are present? Which wins? 761 // The Intel xed rule, and ours, is that the one that occurs last wins. 762 // The GNU libopcodes rule, which we implement only in gnuCompat mode, 763 // is that F3 beats F2 unless F3 has no special meaning, in which 764 // case F3 can be a modified on an F2 special meaning. 765 // 766 // Concretely, 767 // 66 0F D6 /r is MOVQ 768 // F2 0F D6 /r is MOVDQ2Q 769 // F3 0F D6 /r is MOVQ2DQ. 770 // 771 // F2 66 0F D6 /r is 66 + MOVDQ2Q always. 772 // 66 F2 0F D6 /r is 66 + MOVDQ2Q always. 773 // F3 66 0F D6 /r is 66 + MOVQ2DQ always. 774 // 66 F3 0F D6 /r is 66 + MOVQ2DQ always. 775 // F2 F3 0F D6 /r is F2 + MOVQ2DQ always. 776 // F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes. 777 // Adding 66 anywhere in the prefix section of the 778 // last two cases does not change the outcome. 779 // 780 // Finally, what if there is a variant in which 66 is a mandatory 781 // prefix rather than an operand size override, but we know of 782 // no corresponding F2/F3 form, and we see both F2/F3 and 66. 783 // Does F2/F3 still take priority, so that the result is an unknown 784 // instruction, or does the 66 take priority, so that the extended 785 // 66 instruction should be interpreted as having a REP/REPN prefix? 786 // Intel xed does the former and GNU libopcodes does the latter. 787 // We side with Intel xed, unless we are trying to match libopcodes 788 // more closely during the comparison-based test suite. 789 // 790 // In 64-bit mode REX.W is another valid prefix to test for, but 791 // there is less ambiguity about that. When present, REX.W is 792 // always the first entry in the table. 793 n := int(decoder[pc]) 794 pc++ 795 sawF3 := false 796 for j := 0; j < n; j++ { 797 prefix := Prefix(decoder[pc+2*j]) 798 if prefix.IsREX() { 799 rexUsed |= prefix 800 if rex&prefix == prefix { 801 pc = int(decoder[pc+2*j+1]) 802 continue Decode 803 } 804 continue 805 } 806 ok := false 807 if prefix == 0 { 808 ok = true 809 } else if prefix.IsREX() { 810 rexUsed |= prefix 811 if rex&prefix == prefix { 812 ok = true 813 } 814 } else if prefix == 0xC5 || prefix == 0xC4 { 815 if vex == prefix { 816 ok = true 817 } 818 } else if vex != 0 && (prefix == 0x0F || prefix == 0x0F38 || prefix == 0x0F3A || 819 prefix == 0x66 || prefix == 0xF2 || prefix == 0xF3) { 820 var vexM, vexP Prefix 821 if vex == 0xC5 { 822 vexM = 1 // 2 byte vex always implies 0F 823 vexP = inst.Prefix[vexIndex+1] 824 } else { 825 vexM = inst.Prefix[vexIndex+1] 826 vexP = inst.Prefix[vexIndex+2] 827 } 828 switch prefix { 829 case 0x66: 830 ok = vexP&3 == 1 831 case 0xF3: 832 ok = vexP&3 == 2 833 case 0xF2: 834 ok = vexP&3 == 3 835 case 0x0F: 836 ok = vexM&3 == 1 837 case 0x0F38: 838 ok = vexM&3 == 2 839 case 0x0F3A: 840 ok = vexM&3 == 3 841 } 842 } else { 843 if prefix == 0xF3 { 844 sawF3 = true 845 } 846 switch prefix { 847 case PrefixLOCK: 848 if lockIndex >= 0 { 849 inst.Prefix[lockIndex] |= PrefixImplicit 850 ok = true 851 } 852 case PrefixREP, PrefixREPN: 853 if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix { 854 inst.Prefix[repIndex] |= PrefixImplicit 855 ok = true 856 } 857 if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) { 858 // Check to see if earlier prefix F3 is present. 859 for i := repIndex - 1; i >= 0; i-- { 860 if inst.Prefix[i]&0xFF == prefix { 861 inst.Prefix[i] |= PrefixImplicit 862 ok = true 863 } 864 } 865 } 866 if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 { 867 // Check to see if earlier prefix F2 is present. 868 for i := repIndex - 1; i >= 0; i-- { 869 if inst.Prefix[i]&0xFF == prefix { 870 inst.Prefix[i] |= PrefixImplicit 871 ok = true 872 } 873 } 874 } 875 case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS: 876 if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix { 877 inst.Prefix[segIndex] |= PrefixImplicit 878 ok = true 879 } 880 case PrefixDataSize: 881 // Looking for 66 mandatory prefix. 882 // The F2/F3 mandatory prefixes take priority when both are present. 883 // If we got this far in the xCondPrefix table and an F2/F3 is present, 884 // it means the table didn't have any entry for that prefix. But if 66 has 885 // special meaning, perhaps F2/F3 have special meaning that we don't know. 886 // Intel xed works this way, treating the F2/F3 as inhibiting the 66. 887 // GNU libopcodes allows the 66 to match. We do what Intel xed does 888 // except in gnuCompat mode. 889 if repIndex >= 0 && !gnuCompat { 890 inst.Op = 0 891 break Decode 892 } 893 if dataSizeIndex >= 0 { 894 inst.Prefix[dataSizeIndex] |= PrefixImplicit 895 ok = true 896 } 897 case PrefixAddrSize: 898 if addrSizeIndex >= 0 { 899 inst.Prefix[addrSizeIndex] |= PrefixImplicit 900 ok = true 901 } 902 } 903 } 904 if ok { 905 pc = int(decoder[pc+2*j+1]) 906 continue Decode 907 } 908 } 909 inst.Op = 0 910 break Decode 911 912 case xCondSlashR: 913 pc = int(decoder[pc+regop&7]) 914 915 // Input. 916 917 case xReadSlashR: 918 // done above 919 920 case xReadIb: 921 if pos >= len(src) { 922 return truncated(src, mode) 923 } 924 imm8 = int8(src[pos]) 925 pos++ 926 927 case xReadIw: 928 if pos+2 > len(src) { 929 return truncated(src, mode) 930 } 931 imm = int64(binary.LittleEndian.Uint16(src[pos:])) 932 pos += 2 933 934 case xReadId: 935 if pos+4 > len(src) { 936 return truncated(src, mode) 937 } 938 imm = int64(binary.LittleEndian.Uint32(src[pos:])) 939 pos += 4 940 941 case xReadIo: 942 if pos+8 > len(src) { 943 return truncated(src, mode) 944 } 945 imm = int64(binary.LittleEndian.Uint64(src[pos:])) 946 pos += 8 947 948 case xReadCb: 949 if pos >= len(src) { 950 return truncated(src, mode) 951 } 952 immcpos = pos 953 immc = int64(src[pos]) 954 pos++ 955 956 case xReadCw: 957 if pos+2 > len(src) { 958 return truncated(src, mode) 959 } 960 immcpos = pos 961 immc = int64(binary.LittleEndian.Uint16(src[pos:])) 962 pos += 2 963 964 case xReadCm: 965 immcpos = pos 966 if addrMode == 16 { 967 if pos+2 > len(src) { 968 return truncated(src, mode) 969 } 970 immc = int64(binary.LittleEndian.Uint16(src[pos:])) 971 pos += 2 972 } else if addrMode == 32 { 973 if pos+4 > len(src) { 974 return truncated(src, mode) 975 } 976 immc = int64(binary.LittleEndian.Uint32(src[pos:])) 977 pos += 4 978 } else { 979 if pos+8 > len(src) { 980 return truncated(src, mode) 981 } 982 immc = int64(binary.LittleEndian.Uint64(src[pos:])) 983 pos += 8 984 } 985 case xReadCd: 986 immcpos = pos 987 if pos+4 > len(src) { 988 return truncated(src, mode) 989 } 990 immc = int64(binary.LittleEndian.Uint32(src[pos:])) 991 pos += 4 992 993 case xReadCp: 994 immcpos = pos 995 if pos+6 > len(src) { 996 return truncated(src, mode) 997 } 998 w := binary.LittleEndian.Uint32(src[pos:]) 999 w2 := binary.LittleEndian.Uint16(src[pos+4:]) 1000 immc = int64(w2)<<32 | int64(w) 1001 pos += 6 1002 1003 // Output. 1004 1005 case xSetOp: 1006 inst.Op = Op(decoder[pc]) 1007 pc++ 1008 1009 case xArg1, 1010 xArg3, 1011 xArgAL, 1012 xArgAX, 1013 xArgCL, 1014 xArgCS, 1015 xArgDS, 1016 xArgDX, 1017 xArgEAX, 1018 xArgEDX, 1019 xArgES, 1020 xArgFS, 1021 xArgGS, 1022 xArgRAX, 1023 xArgRDX, 1024 xArgSS, 1025 xArgST, 1026 xArgXMM0: 1027 inst.Args[narg] = fixedArg[x] 1028 narg++ 1029 1030 case xArgImm8: 1031 inst.Args[narg] = Imm(imm8) 1032 narg++ 1033 1034 case xArgImm8u: 1035 inst.Args[narg] = Imm(uint8(imm8)) 1036 narg++ 1037 1038 case xArgImm16: 1039 inst.Args[narg] = Imm(int16(imm)) 1040 narg++ 1041 1042 case xArgImm16u: 1043 inst.Args[narg] = Imm(uint16(imm)) 1044 narg++ 1045 1046 case xArgImm32: 1047 inst.Args[narg] = Imm(int32(imm)) 1048 narg++ 1049 1050 case xArgImm64: 1051 inst.Args[narg] = Imm(imm) 1052 narg++ 1053 1054 case xArgM, 1055 xArgM128, 1056 xArgM256, 1057 xArgM1428byte, 1058 xArgM16, 1059 xArgM16and16, 1060 xArgM16and32, 1061 xArgM16and64, 1062 xArgM16colon16, 1063 xArgM16colon32, 1064 xArgM16colon64, 1065 xArgM16int, 1066 xArgM2byte, 1067 xArgM32, 1068 xArgM32and32, 1069 xArgM32fp, 1070 xArgM32int, 1071 xArgM512byte, 1072 xArgM64, 1073 xArgM64fp, 1074 xArgM64int, 1075 xArgM8, 1076 xArgM80bcd, 1077 xArgM80dec, 1078 xArgM80fp, 1079 xArgM94108byte, 1080 xArgMem: 1081 if !haveMem { 1082 inst.Op = 0 1083 break Decode 1084 } 1085 inst.Args[narg] = mem 1086 inst.MemBytes = int(memBytes[decodeOp(x)]) 1087 if mem.Base == RIP { 1088 inst.PCRel = displen 1089 inst.PCRelOff = dispoff 1090 } 1091 narg++ 1092 1093 case xArgPtr16colon16: 1094 inst.Args[narg] = Imm(immc >> 16) 1095 inst.Args[narg+1] = Imm(immc & (1<<16 - 1)) 1096 narg += 2 1097 1098 case xArgPtr16colon32: 1099 inst.Args[narg] = Imm(immc >> 32) 1100 inst.Args[narg+1] = Imm(immc & (1<<32 - 1)) 1101 narg += 2 1102 1103 case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64: 1104 // TODO(rsc): Can address be 64 bits? 1105 mem = Mem{Disp: int64(immc)} 1106 if segIndex >= 0 { 1107 mem.Segment = prefixToSegment(inst.Prefix[segIndex]) 1108 inst.Prefix[segIndex] |= PrefixImplicit 1109 } 1110 inst.Args[narg] = mem 1111 inst.MemBytes = int(memBytes[decodeOp(x)]) 1112 if mem.Base == RIP { 1113 inst.PCRel = displen 1114 inst.PCRelOff = dispoff 1115 } 1116 narg++ 1117 1118 case xArgYmm1: 1119 base := baseReg[x] 1120 index := Reg(regop) 1121 if inst.Prefix[vexIndex+1]&0x80 == 0 { 1122 index += 8 1123 } 1124 inst.Args[narg] = base + index 1125 narg++ 1126 1127 case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7: 1128 base := baseReg[x] 1129 index := Reg(regop) 1130 if rex != 0 && base == AL && index >= 4 { 1131 rexUsed |= PrefixREX 1132 index -= 4 1133 base = SPB 1134 } 1135 inst.Args[narg] = base + index 1136 narg++ 1137 1138 case xArgMm, xArgMm1, xArgTR0dashTR7: 1139 inst.Args[narg] = baseReg[x] + Reg(regop&7) 1140 narg++ 1141 1142 case xArgCR0dashCR7: 1143 // AMD documents an extension that the LOCK prefix 1144 // can be used in place of a REX prefix in order to access 1145 // CR8 from 32-bit mode. The LOCK prefix is allowed in 1146 // all modes, provided the corresponding CPUID bit is set. 1147 if lockIndex >= 0 { 1148 inst.Prefix[lockIndex] |= PrefixImplicit 1149 regop += 8 1150 } 1151 inst.Args[narg] = CR0 + Reg(regop) 1152 narg++ 1153 1154 case xArgSreg: 1155 regop &= 7 1156 if regop >= 6 { 1157 inst.Op = 0 1158 break Decode 1159 } 1160 inst.Args[narg] = ES + Reg(regop) 1161 narg++ 1162 1163 case xArgRmf16, xArgRmf32, xArgRmf64: 1164 base := baseReg[x] 1165 index := Reg(modrm & 07) 1166 if rex&PrefixREXB != 0 { 1167 rexUsed |= PrefixREXB 1168 index += 8 1169 } 1170 inst.Args[narg] = base + index 1171 narg++ 1172 1173 case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi: 1174 n := inst.Opcode >> uint(opshift+8) & 07 1175 base := baseReg[x] 1176 index := Reg(n) 1177 if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi { 1178 rexUsed |= PrefixREXB 1179 index += 8 1180 } 1181 if rex != 0 && base == AL && index >= 4 { 1182 rexUsed |= PrefixREX 1183 index -= 4 1184 base = SPB 1185 } 1186 inst.Args[narg] = base + index 1187 narg++ 1188 case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16, 1189 xArgMmM32, xArgMmM64, xArgMm2M64, 1190 xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128, 1191 xArgYmm2M256: 1192 if haveMem { 1193 inst.Args[narg] = mem 1194 inst.MemBytes = int(memBytes[decodeOp(x)]) 1195 if mem.Base == RIP { 1196 inst.PCRel = displen 1197 inst.PCRelOff = dispoff 1198 } 1199 } else { 1200 base := baseReg[x] 1201 index := Reg(rm) 1202 switch decodeOp(x) { 1203 case xArgMmM32, xArgMmM64, xArgMm2M64: 1204 // There are only 8 MMX registers, so these ignore the REX.X bit. 1205 index &= 7 1206 case xArgRM8: 1207 if rex != 0 && index >= 4 { 1208 rexUsed |= PrefixREX 1209 index -= 4 1210 base = SPB 1211 } 1212 case xArgYmm2M256: 1213 if vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0x40 { 1214 index += 8 1215 } 1216 } 1217 inst.Args[narg] = base + index 1218 } 1219 narg++ 1220 1221 case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag 1222 if haveMem { 1223 inst.Op = 0 1224 break Decode 1225 } 1226 inst.Args[narg] = baseReg[x] + Reg(rm&7) 1227 narg++ 1228 1229 case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag 1230 if haveMem { 1231 inst.Op = 0 1232 break Decode 1233 } 1234 inst.Args[narg] = baseReg[x] + Reg(rm) 1235 narg++ 1236 1237 case xArgRel8: 1238 inst.PCRelOff = immcpos 1239 inst.PCRel = 1 1240 inst.Args[narg] = Rel(int8(immc)) 1241 narg++ 1242 1243 case xArgRel16: 1244 inst.PCRelOff = immcpos 1245 inst.PCRel = 2 1246 inst.Args[narg] = Rel(int16(immc)) 1247 narg++ 1248 1249 case xArgRel32: 1250 inst.PCRelOff = immcpos 1251 inst.PCRel = 4 1252 inst.Args[narg] = Rel(int32(immc)) 1253 narg++ 1254 } 1255 } 1256 1257 if inst.Op == 0 { 1258 // Invalid instruction. 1259 if nprefix > 0 { 1260 return instPrefix(src[0], mode) // invalid instruction 1261 } 1262 return Inst{Len: pos}, ErrUnrecognized 1263 } 1264 1265 // Matched! Hooray! 1266 1267 // 90 decodes as XCHG EAX, EAX but is NOP. 1268 // 66 90 decodes as XCHG AX, AX and is NOP too. 1269 // 48 90 decodes as XCHG RAX, RAX and is NOP too. 1270 // 43 90 decodes as XCHG R8D, EAX and is *not* NOP. 1271 // F3 90 decodes as REP XCHG EAX, EAX but is PAUSE. 1272 // It's all too special to handle in the decoding tables, at least for now. 1273 if inst.Op == XCHG && inst.Opcode>>24 == 0x90 { 1274 if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX { 1275 inst.Op = NOP 1276 if dataSizeIndex >= 0 { 1277 inst.Prefix[dataSizeIndex] &^= PrefixImplicit 1278 } 1279 inst.Args[0] = nil 1280 inst.Args[1] = nil 1281 } 1282 if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 { 1283 inst.Prefix[repIndex] |= PrefixImplicit 1284 inst.Op = PAUSE 1285 inst.Args[0] = nil 1286 inst.Args[1] = nil 1287 } else if gnuCompat { 1288 for i := nprefix - 1; i >= 0; i-- { 1289 if inst.Prefix[i]&0xFF == 0xF3 { 1290 inst.Prefix[i] |= PrefixImplicit 1291 inst.Op = PAUSE 1292 inst.Args[0] = nil 1293 inst.Args[1] = nil 1294 break 1295 } 1296 } 1297 } 1298 } 1299 1300 // defaultSeg returns the default segment for an implicit 1301 // memory reference: the final override if present, or else DS. 1302 defaultSeg := func() Reg { 1303 if segIndex >= 0 { 1304 inst.Prefix[segIndex] |= PrefixImplicit 1305 return prefixToSegment(inst.Prefix[segIndex]) 1306 } 1307 return DS 1308 } 1309 1310 // Add implicit arguments not present in the tables. 1311 // Normally we shy away from making implicit arguments explicit, 1312 // following the Intel manuals, but adding the arguments seems 1313 // the best way to express the effect of the segment override prefixes. 1314 // TODO(rsc): Perhaps add these to the tables and 1315 // create bytecode instructions for them. 1316 usedAddrSize := false 1317 switch inst.Op { 1318 case INSB, INSW, INSD: 1319 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1320 inst.Args[1] = DX 1321 usedAddrSize = true 1322 1323 case OUTSB, OUTSW, OUTSD: 1324 inst.Args[0] = DX 1325 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1326 usedAddrSize = true 1327 1328 case MOVSB, MOVSW, MOVSD, MOVSQ: 1329 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1330 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1331 usedAddrSize = true 1332 1333 case CMPSB, CMPSW, CMPSD, CMPSQ: 1334 inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1335 inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1336 usedAddrSize = true 1337 1338 case LODSB, LODSW, LODSD, LODSQ: 1339 switch inst.Op { 1340 case LODSB: 1341 inst.Args[0] = AL 1342 case LODSW: 1343 inst.Args[0] = AX 1344 case LODSD: 1345 inst.Args[0] = EAX 1346 case LODSQ: 1347 inst.Args[0] = RAX 1348 } 1349 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1350 usedAddrSize = true 1351 1352 case STOSB, STOSW, STOSD, STOSQ: 1353 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1354 switch inst.Op { 1355 case STOSB: 1356 inst.Args[1] = AL 1357 case STOSW: 1358 inst.Args[1] = AX 1359 case STOSD: 1360 inst.Args[1] = EAX 1361 case STOSQ: 1362 inst.Args[1] = RAX 1363 } 1364 usedAddrSize = true 1365 1366 case SCASB, SCASW, SCASD, SCASQ: 1367 inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1368 switch inst.Op { 1369 case SCASB: 1370 inst.Args[0] = AL 1371 case SCASW: 1372 inst.Args[0] = AX 1373 case SCASD: 1374 inst.Args[0] = EAX 1375 case SCASQ: 1376 inst.Args[0] = RAX 1377 } 1378 usedAddrSize = true 1379 1380 case XLATB: 1381 inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX} 1382 usedAddrSize = true 1383 } 1384 1385 // If we used the address size annotation to construct the 1386 // argument list, mark that prefix as implicit: it doesn't need 1387 // to be shown when printing the instruction. 1388 if haveMem || usedAddrSize { 1389 if addrSizeIndex >= 0 { 1390 inst.Prefix[addrSizeIndex] |= PrefixImplicit 1391 } 1392 } 1393 1394 // Similarly, if there's some memory operand, the segment 1395 // will be shown there and doesn't need to be shown as an 1396 // explicit prefix. 1397 if haveMem { 1398 if segIndex >= 0 { 1399 inst.Prefix[segIndex] |= PrefixImplicit 1400 } 1401 } 1402 1403 // Branch predict prefixes are overloaded segment prefixes, 1404 // since segment prefixes don't make sense on conditional jumps. 1405 // Rewrite final instance to prediction prefix. 1406 // The set of instructions to which the prefixes apply (other then the 1407 // Jcc conditional jumps) is not 100% clear from the manuals, but 1408 // the disassemblers seem to agree about the LOOP and JCXZ instructions, 1409 // so we'll follow along. 1410 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1411 if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ { 1412 PredictLoop: 1413 for i := nprefix - 1; i >= 0; i-- { 1414 p := inst.Prefix[i] 1415 switch p & 0xFF { 1416 case PrefixCS: 1417 inst.Prefix[i] = PrefixPN 1418 break PredictLoop 1419 case PrefixDS: 1420 inst.Prefix[i] = PrefixPT 1421 break PredictLoop 1422 } 1423 } 1424 } 1425 1426 // The BND prefix is part of the Intel Memory Protection Extensions (MPX). 1427 // A REPN applied to certain control transfers is a BND prefix to bound 1428 // the range of possible destinations. There's surprisingly little documentation 1429 // about this, so we just do what libopcodes and xed agree on. 1430 // In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions 1431 // does not turn into a BND. 1432 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1433 if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET { 1434 for i := nprefix - 1; i >= 0; i-- { 1435 p := inst.Prefix[i] 1436 if p&^PrefixIgnored == PrefixREPN { 1437 inst.Prefix[i] = PrefixBND 1438 break 1439 } 1440 } 1441 } 1442 1443 // The LOCK prefix only applies to certain instructions, and then only 1444 // to instances of the instruction with a memory destination. 1445 // Other uses of LOCK are invalid and cause a processor exception, 1446 // in contrast to the "just ignore it" spirit applied to all other prefixes. 1447 // Mark invalid lock prefixes. 1448 hasLock := false 1449 if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 { 1450 switch inst.Op { 1451 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1452 case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG: 1453 if isMem(inst.Args[0]) { 1454 hasLock = true 1455 break 1456 } 1457 fallthrough 1458 default: 1459 inst.Prefix[lockIndex] |= PrefixInvalid 1460 } 1461 } 1462 1463 // In certain cases, all of which require a memory destination, 1464 // the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE 1465 // from the Intel Transactional Synchroniation Extensions (TSX). 1466 // 1467 // The specific rules are: 1468 // (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE. 1469 // (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE. 1470 // (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE. 1471 if isMem(inst.Args[0]) { 1472 if inst.Op == XCHG { 1473 hasLock = true 1474 } 1475 1476 for i := len(inst.Prefix) - 1; i >= 0; i-- { 1477 p := inst.Prefix[i] &^ PrefixIgnored 1478 switch p { 1479 case PrefixREPN: 1480 if hasLock { 1481 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE 1482 } 1483 1484 case PrefixREP: 1485 if hasLock { 1486 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE 1487 } 1488 1489 if inst.Op == MOV { 1490 op := (inst.Opcode >> 24) &^ 1 1491 if op == 0x88 || op == 0xC6 { 1492 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE 1493 } 1494 } 1495 } 1496 } 1497 } 1498 1499 // If REP is used on a non-REP-able instruction, mark the prefix as ignored. 1500 if repIndex >= 0 { 1501 switch inst.Prefix[repIndex] { 1502 case PrefixREP, PrefixREPN: 1503 switch inst.Op { 1504 // According to the manuals, the REP/REPE prefix applies to all of these, 1505 // while the REPN applies only to some of them. However, both libopcodes 1506 // and xed show both prefixes explicitly for all instructions, so we do the same. 1507 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1508 case INSB, INSW, INSD, 1509 MOVSB, MOVSW, MOVSD, MOVSQ, 1510 OUTSB, OUTSW, OUTSD, 1511 LODSB, LODSW, LODSD, LODSQ, 1512 CMPSB, CMPSW, CMPSD, CMPSQ, 1513 SCASB, SCASW, SCASD, SCASQ, 1514 STOSB, STOSW, STOSD, STOSQ: 1515 // ok 1516 default: 1517 inst.Prefix[repIndex] |= PrefixIgnored 1518 } 1519 } 1520 } 1521 1522 // If REX was present, mark implicit if all the 1 bits were consumed. 1523 if rexIndex >= 0 { 1524 if rexUsed != 0 { 1525 rexUsed |= PrefixREX 1526 } 1527 if rex&^rexUsed == 0 { 1528 inst.Prefix[rexIndex] |= PrefixImplicit 1529 } 1530 } 1531 1532 inst.DataSize = dataMode 1533 inst.AddrSize = addrMode 1534 inst.Mode = mode 1535 inst.Len = pos 1536 return inst, nil 1537 } 1538 1539 var errInternal = errors.New("internal error") 1540 1541 // addr16 records the eight 16-bit addressing modes. 1542 var addr16 = [8]Mem{ 1543 {Base: BX, Scale: 1, Index: SI}, 1544 {Base: BX, Scale: 1, Index: DI}, 1545 {Base: BP, Scale: 1, Index: SI}, 1546 {Base: BP, Scale: 1, Index: DI}, 1547 {Base: SI}, 1548 {Base: DI}, 1549 {Base: BP}, 1550 {Base: BX}, 1551 } 1552 1553 // baseReg returns the base register for a given register size in bits. 1554 func baseRegForBits(bits int) Reg { 1555 switch bits { 1556 case 8: 1557 return AL 1558 case 16: 1559 return AX 1560 case 32: 1561 return EAX 1562 case 64: 1563 return RAX 1564 } 1565 return 0 1566 } 1567 1568 // baseReg records the base register for argument types that specify 1569 // a range of registers indexed by op, regop, or rm. 1570 var baseReg = [...]Reg{ 1571 xArgDR0dashDR7: DR0, 1572 xArgMm1: M0, 1573 xArgMm2: M0, 1574 xArgMm2M64: M0, 1575 xArgMm: M0, 1576 xArgMmM32: M0, 1577 xArgMmM64: M0, 1578 xArgR16: AX, 1579 xArgR16op: AX, 1580 xArgR32: EAX, 1581 xArgR32M16: EAX, 1582 xArgR32M8: EAX, 1583 xArgR32op: EAX, 1584 xArgR64: RAX, 1585 xArgR64M16: RAX, 1586 xArgR64op: RAX, 1587 xArgR8: AL, 1588 xArgR8op: AL, 1589 xArgRM16: AX, 1590 xArgRM32: EAX, 1591 xArgRM64: RAX, 1592 xArgRM8: AL, 1593 xArgRmf16: AX, 1594 xArgRmf32: EAX, 1595 xArgRmf64: RAX, 1596 xArgSTi: F0, 1597 xArgTR0dashTR7: TR0, 1598 xArgXmm1: X0, 1599 xArgYmm1: X0, 1600 xArgXmm2: X0, 1601 xArgXmm2M128: X0, 1602 xArgYmm2M256: X0, 1603 xArgXmm2M16: X0, 1604 xArgXmm2M32: X0, 1605 xArgXmm2M64: X0, 1606 xArgXmm: X0, 1607 xArgXmmM128: X0, 1608 xArgXmmM32: X0, 1609 xArgXmmM64: X0, 1610 } 1611 1612 // prefixToSegment returns the segment register 1613 // corresponding to a particular segment prefix. 1614 func prefixToSegment(p Prefix) Reg { 1615 switch p &^ PrefixImplicit { 1616 case PrefixCS: 1617 return CS 1618 case PrefixDS: 1619 return DS 1620 case PrefixES: 1621 return ES 1622 case PrefixFS: 1623 return FS 1624 case PrefixGS: 1625 return GS 1626 case PrefixSS: 1627 return SS 1628 } 1629 return 0 1630 } 1631 1632 // fixedArg records the fixed arguments corresponding to the given bytecodes. 1633 var fixedArg = [...]Arg{ 1634 xArg1: Imm(1), 1635 xArg3: Imm(3), 1636 xArgAL: AL, 1637 xArgAX: AX, 1638 xArgDX: DX, 1639 xArgEAX: EAX, 1640 xArgEDX: EDX, 1641 xArgRAX: RAX, 1642 xArgRDX: RDX, 1643 xArgCL: CL, 1644 xArgCS: CS, 1645 xArgDS: DS, 1646 xArgES: ES, 1647 xArgFS: FS, 1648 xArgGS: GS, 1649 xArgSS: SS, 1650 xArgST: F0, 1651 xArgXMM0: X0, 1652 } 1653 1654 // memBytes records the size of the memory pointed at 1655 // by a memory argument of the given form. 1656 var memBytes = [...]int8{ 1657 xArgM128: 128 / 8, 1658 xArgM256: 256 / 8, 1659 xArgM16: 16 / 8, 1660 xArgM16and16: (16 + 16) / 8, 1661 xArgM16colon16: (16 + 16) / 8, 1662 xArgM16colon32: (16 + 32) / 8, 1663 xArgM16int: 16 / 8, 1664 xArgM2byte: 2, 1665 xArgM32: 32 / 8, 1666 xArgM32and32: (32 + 32) / 8, 1667 xArgM32fp: 32 / 8, 1668 xArgM32int: 32 / 8, 1669 xArgM64: 64 / 8, 1670 xArgM64fp: 64 / 8, 1671 xArgM64int: 64 / 8, 1672 xArgMm2M64: 64 / 8, 1673 xArgMmM32: 32 / 8, 1674 xArgMmM64: 64 / 8, 1675 xArgMoffs16: 16 / 8, 1676 xArgMoffs32: 32 / 8, 1677 xArgMoffs64: 64 / 8, 1678 xArgMoffs8: 8 / 8, 1679 xArgR32M16: 16 / 8, 1680 xArgR32M8: 8 / 8, 1681 xArgR64M16: 16 / 8, 1682 xArgRM16: 16 / 8, 1683 xArgRM32: 32 / 8, 1684 xArgRM64: 64 / 8, 1685 xArgRM8: 8 / 8, 1686 xArgXmm2M128: 128 / 8, 1687 xArgYmm2M256: 256 / 8, 1688 xArgXmm2M16: 16 / 8, 1689 xArgXmm2M32: 32 / 8, 1690 xArgXmm2M64: 64 / 8, 1691 xArgXmm: 128 / 8, 1692 xArgXmmM128: 128 / 8, 1693 xArgXmmM32: 32 / 8, 1694 xArgXmmM64: 64 / 8, 1695 } 1696 1697 // isCondJmp records the conditional jumps. 1698 var isCondJmp = [maxOp + 1]bool{ 1699 JA: true, 1700 JAE: true, 1701 JB: true, 1702 JBE: true, 1703 JE: true, 1704 JG: true, 1705 JGE: true, 1706 JL: true, 1707 JLE: true, 1708 JNE: true, 1709 JNO: true, 1710 JNP: true, 1711 JNS: true, 1712 JO: true, 1713 JP: true, 1714 JS: true, 1715 } 1716 1717 // isLoop records the loop operators. 1718 var isLoop = [maxOp + 1]bool{ 1719 LOOP: true, 1720 LOOPE: true, 1721 LOOPNE: true, 1722 JECXZ: true, 1723 JRCXZ: true, 1724 } 1725