1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Table-driven decoding of x86 instructions. 6 7 package x86asm 8 9 import ( 10 "encoding/binary" 11 "errors" 12 "fmt" 13 "runtime" 14 ) 15 16 // Set trace to true to cause the decoder to print the PC sequence 17 // of the executed instruction codes. This is typically only useful 18 // when you are running a test of a single input case. 19 const trace = false 20 21 // A decodeOp is a single instruction in the decoder bytecode program. 22 // 23 // The decodeOps correspond to consuming and conditionally branching 24 // on input bytes, consuming additional fields, and then interpreting 25 // consumed data as instruction arguments. The names of the xRead and xArg 26 // operations are taken from the Intel manual conventions, for example 27 // Volume 2, Section 3.1.1, page 487 of 28 // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf 29 // 30 // The actual decoding program is generated by ../x86map. 31 // 32 // TODO(rsc): We may be able to merge various of the memory operands 33 // since we don't care about, say, the distinction between m80dec and m80bcd. 34 // Similarly, mm and mm1 have identical meaning, as do xmm and xmm1. 35 36 type decodeOp uint16 37 38 const ( 39 xFail decodeOp = iota // invalid instruction (return) 40 xMatch // completed match 41 xJump // jump to pc 42 43 xCondByte // switch on instruction byte value 44 xCondSlashR // read and switch on instruction /r value 45 xCondPrefix // switch on presence of instruction prefix 46 xCondIs64 // switch on 64-bit processor mode 47 xCondDataSize // switch on operand size 48 xCondAddrSize // switch on address size 49 xCondIsMem // switch on memory vs register argument 50 51 xSetOp // set instruction opcode 52 53 xReadSlashR // read /r 54 xReadIb // read ib 55 xReadIw // read iw 56 xReadId // read id 57 xReadIo // read io 58 xReadCb // read cb 59 xReadCw // read cw 60 xReadCd // read cd 61 xReadCp // read cp 62 xReadCm // read cm 63 64 xArg1 // arg 1 65 xArg3 // arg 3 66 xArgAL // arg AL 67 xArgAX // arg AX 68 xArgCL // arg CL 69 xArgCR0dashCR7 // arg CR0-CR7 70 xArgCS // arg CS 71 xArgDR0dashDR7 // arg DR0-DR7 72 xArgDS // arg DS 73 xArgDX // arg DX 74 xArgEAX // arg EAX 75 xArgEDX // arg EDX 76 xArgES // arg ES 77 xArgFS // arg FS 78 xArgGS // arg GS 79 xArgImm16 // arg imm16 80 xArgImm32 // arg imm32 81 xArgImm64 // arg imm64 82 xArgImm8 // arg imm8 83 xArgImm8u // arg imm8 but record as unsigned 84 xArgImm16u // arg imm8 but record as unsigned 85 xArgM // arg m 86 xArgM128 // arg m128 87 xArgM1428byte // arg m14/28byte 88 xArgM16 // arg m16 89 xArgM16and16 // arg m16&16 90 xArgM16and32 // arg m16&32 91 xArgM16and64 // arg m16&64 92 xArgM16colon16 // arg m16:16 93 xArgM16colon32 // arg m16:32 94 xArgM16colon64 // arg m16:64 95 xArgM16int // arg m16int 96 xArgM2byte // arg m2byte 97 xArgM32 // arg m32 98 xArgM32and32 // arg m32&32 99 xArgM32fp // arg m32fp 100 xArgM32int // arg m32int 101 xArgM512byte // arg m512byte 102 xArgM64 // arg m64 103 xArgM64fp // arg m64fp 104 xArgM64int // arg m64int 105 xArgM8 // arg m8 106 xArgM80bcd // arg m80bcd 107 xArgM80dec // arg m80dec 108 xArgM80fp // arg m80fp 109 xArgM94108byte // arg m94/108byte 110 xArgMm // arg mm 111 xArgMm1 // arg mm1 112 xArgMm2 // arg mm2 113 xArgMm2M64 // arg mm2/m64 114 xArgMmM32 // arg mm/m32 115 xArgMmM64 // arg mm/m64 116 xArgMem // arg mem 117 xArgMoffs16 // arg moffs16 118 xArgMoffs32 // arg moffs32 119 xArgMoffs64 // arg moffs64 120 xArgMoffs8 // arg moffs8 121 xArgPtr16colon16 // arg ptr16:16 122 xArgPtr16colon32 // arg ptr16:32 123 xArgR16 // arg r16 124 xArgR16op // arg r16 with +rw in opcode 125 xArgR32 // arg r32 126 xArgR32M16 // arg r32/m16 127 xArgR32M8 // arg r32/m8 128 xArgR32op // arg r32 with +rd in opcode 129 xArgR64 // arg r64 130 xArgR64M16 // arg r64/m16 131 xArgR64op // arg r64 with +rd in opcode 132 xArgR8 // arg r8 133 xArgR8op // arg r8 with +rb in opcode 134 xArgRAX // arg RAX 135 xArgRDX // arg RDX 136 xArgRM // arg r/m 137 xArgRM16 // arg r/m16 138 xArgRM32 // arg r/m32 139 xArgRM64 // arg r/m64 140 xArgRM8 // arg r/m8 141 xArgReg // arg reg 142 xArgRegM16 // arg reg/m16 143 xArgRegM32 // arg reg/m32 144 xArgRegM8 // arg reg/m8 145 xArgRel16 // arg rel16 146 xArgRel32 // arg rel32 147 xArgRel8 // arg rel8 148 xArgSS // arg SS 149 xArgST // arg ST, aka ST(0) 150 xArgSTi // arg ST(i) with +i in opcode 151 xArgSreg // arg Sreg 152 xArgTR0dashTR7 // arg TR0-TR7 153 xArgXmm // arg xmm 154 xArgXMM0 // arg <XMM0> 155 xArgXmm1 // arg xmm1 156 xArgXmm2 // arg xmm2 157 xArgXmm2M128 // arg xmm2/m128 158 xArgXmm2M16 // arg xmm2/m16 159 xArgXmm2M32 // arg xmm2/m32 160 xArgXmm2M64 // arg xmm2/m64 161 xArgXmmM128 // arg xmm/m128 162 xArgXmmM32 // arg xmm/m32 163 xArgXmmM64 // arg xmm/m64 164 xArgRmf16 // arg r/m16 but force mod=3 165 xArgRmf32 // arg r/m32 but force mod=3 166 xArgRmf64 // arg r/m64 but force mod=3 167 ) 168 169 // instPrefix returns an Inst describing just one prefix byte. 170 // It is only used if there is a prefix followed by an unintelligible 171 // or invalid instruction byte sequence. 172 func instPrefix(b byte, mode int) (Inst, error) { 173 // When tracing it is useful to see what called instPrefix to report an error. 174 if trace { 175 _, file, line, _ := runtime.Caller(1) 176 fmt.Printf("%s:%d\n", file, line) 177 } 178 p := Prefix(b) 179 switch p { 180 case PrefixDataSize: 181 if mode == 16 { 182 p = PrefixData32 183 } else { 184 p = PrefixData16 185 } 186 case PrefixAddrSize: 187 if mode == 32 { 188 p = PrefixAddr16 189 } else { 190 p = PrefixAddr32 191 } 192 } 193 // Note: using composite literal with Prefix key confuses 'bundle' tool. 194 inst := Inst{Len: 1} 195 inst.Prefix = Prefixes{p} 196 return inst, nil 197 } 198 199 // truncated reports a truncated instruction. 200 // For now we use instPrefix but perhaps later we will return 201 // a specific error here. 202 func truncated(src []byte, mode int) (Inst, error) { 203 // return Inst{}, len(src), ErrTruncated 204 return instPrefix(src[0], mode) // too long 205 } 206 207 // These are the errors returned by Decode. 208 var ( 209 ErrInvalidMode = errors.New("invalid x86 mode in Decode") 210 ErrTruncated = errors.New("truncated instruction") 211 ErrUnrecognized = errors.New("unrecognized instruction") 212 ) 213 214 // decoderCover records coverage information for which parts 215 // of the byte code have been executed. 216 // TODO(rsc): This is for testing. Only use this if a flag is given. 217 var decoderCover []bool 218 219 // Decode decodes the leading bytes in src as a single instruction. 220 // The mode arguments specifies the assumed processor mode: 221 // 16, 32, or 64 for 16-, 32-, and 64-bit execution modes. 222 func Decode(src []byte, mode int) (inst Inst, err error) { 223 return decode1(src, mode, false) 224 } 225 226 // decode1 is the implementation of Decode but takes an extra 227 // gnuCompat flag to cause it to change its behavior to mimic 228 // bugs (or at least unique features) of GNU libopcodes as used 229 // by objdump. We don't believe that logic is the right thing to do 230 // in general, but when testing against libopcodes it simplifies the 231 // comparison if we adjust a few small pieces of logic. 232 // The affected logic is in the conditional branch for "mandatory" prefixes, 233 // case xCondPrefix. 234 func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) { 235 switch mode { 236 case 16, 32, 64: 237 // ok 238 // TODO(rsc): 64-bit mode not tested, probably not working. 239 default: 240 return Inst{}, ErrInvalidMode 241 } 242 243 // Maximum instruction size is 15 bytes. 244 // If we need to read more, return 'truncated instruction. 245 if len(src) > 15 { 246 src = src[:15] 247 } 248 249 var ( 250 // prefix decoding information 251 pos = 0 // position reading src 252 nprefix = 0 // number of prefixes 253 lockIndex = -1 // index of LOCK prefix in src and inst.Prefix 254 repIndex = -1 // index of REP/REPN prefix in src and inst.Prefix 255 segIndex = -1 // index of Group 2 prefix in src and inst.Prefix 256 dataSizeIndex = -1 // index of Group 3 prefix in src and inst.Prefix 257 addrSizeIndex = -1 // index of Group 4 prefix in src and inst.Prefix 258 rex Prefix // rex byte if present (or 0) 259 rexUsed Prefix // bits used in rex byte 260 rexIndex = -1 // index of rex byte 261 262 addrMode = mode // address mode (width in bits) 263 dataMode = mode // operand mode (width in bits) 264 265 // decoded ModR/M fields 266 haveModrm bool 267 modrm int 268 mod int 269 regop int 270 rm int 271 272 // if ModR/M is memory reference, Mem form 273 mem Mem 274 haveMem bool 275 276 // decoded SIB fields 277 haveSIB bool 278 sib int 279 scale int 280 index int 281 base int 282 displen int 283 dispoff int 284 285 // decoded immediate values 286 imm int64 287 imm8 int8 288 immc int64 289 immcpos int 290 291 // output 292 opshift int 293 inst Inst 294 narg int // number of arguments written to inst 295 ) 296 297 if mode == 64 { 298 dataMode = 32 299 } 300 301 // Prefixes are certainly the most complex and underspecified part of 302 // decoding x86 instructions. Although the manuals say things like 303 // up to four prefixes, one from each group, nearly everyone seems to 304 // agree that in practice as many prefixes as possible, including multiple 305 // from a particular group or repetitions of a given prefix, can be used on 306 // an instruction, provided the total instruction length including prefixes 307 // does not exceed the agreed-upon maximum of 15 bytes. 308 // Everyone also agrees that if one of these prefixes is the LOCK prefix 309 // and the instruction is not one of the instructions that can be used with 310 // the LOCK prefix or if the destination is not a memory operand, 311 // then the instruction is invalid and produces the #UD exception. 312 // However, that is the end of any semblance of agreement. 313 // 314 // What happens if prefixes are given that conflict with other prefixes? 315 // For example, the memory segment overrides CS, DS, ES, FS, GS, SS 316 // conflict with each other: only one segment can be in effect. 317 // Disassemblers seem to agree that later prefixes take priority over 318 // earlier ones. I have not taken the time to write assembly programs 319 // to check to see if the hardware agrees. 320 // 321 // What happens if prefixes are given that have no meaning for the 322 // specific instruction to which they are attached? It depends. 323 // If they really have no meaning, they are ignored. However, a future 324 // processor may assign a different meaning. As a disassembler, we 325 // don't really know whether we're seeing a meaningless prefix or one 326 // whose meaning we simply haven't been told yet. 327 // 328 // Combining the two questions, what happens when conflicting 329 // extension prefixes are given? No one seems to know for sure. 330 // For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r, 331 // and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'? 332 // Which prefix wins? See the xCondPrefix prefix for more. 333 // 334 // Writing assembly test cases to divine which interpretation the 335 // CPU uses might clarify the situation, but more likely it would 336 // make the situation even less clear. 337 338 // Read non-REX prefixes. 339 ReadPrefixes: 340 for ; pos < len(src); pos++ { 341 p := Prefix(src[pos]) 342 switch p { 343 default: 344 nprefix = pos 345 break ReadPrefixes 346 347 // Group 1 - lock and repeat prefixes 348 // According to Intel, there should only be one from this set, 349 // but according to AMD both can be present. 350 case 0xF0: 351 if lockIndex >= 0 { 352 inst.Prefix[lockIndex] |= PrefixIgnored 353 } 354 lockIndex = pos 355 case 0xF2, 0xF3: 356 if repIndex >= 0 { 357 inst.Prefix[repIndex] |= PrefixIgnored 358 } 359 repIndex = pos 360 361 // Group 2 - segment override / branch hints 362 case 0x26, 0x2E, 0x36, 0x3E: 363 if mode == 64 { 364 p |= PrefixIgnored 365 break 366 } 367 fallthrough 368 case 0x64, 0x65: 369 if segIndex >= 0 { 370 inst.Prefix[segIndex] |= PrefixIgnored 371 } 372 segIndex = pos 373 374 // Group 3 - operand size override 375 case 0x66: 376 if mode == 16 { 377 dataMode = 32 378 p = PrefixData32 379 } else { 380 dataMode = 16 381 p = PrefixData16 382 } 383 if dataSizeIndex >= 0 { 384 inst.Prefix[dataSizeIndex] |= PrefixIgnored 385 } 386 dataSizeIndex = pos 387 388 // Group 4 - address size override 389 case 0x67: 390 if mode == 32 { 391 addrMode = 16 392 p = PrefixAddr16 393 } else { 394 addrMode = 32 395 p = PrefixAddr32 396 } 397 if addrSizeIndex >= 0 { 398 inst.Prefix[addrSizeIndex] |= PrefixIgnored 399 } 400 addrSizeIndex = pos 401 } 402 403 if pos >= len(inst.Prefix) { 404 return instPrefix(src[0], mode) // too long 405 } 406 407 inst.Prefix[pos] = p 408 } 409 410 // Read REX prefix. 411 if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() { 412 rex = Prefix(src[pos]) 413 rexIndex = pos 414 if pos >= len(inst.Prefix) { 415 return instPrefix(src[0], mode) // too long 416 } 417 inst.Prefix[pos] = rex 418 pos++ 419 if rex&PrefixREXW != 0 { 420 dataMode = 64 421 if dataSizeIndex >= 0 { 422 inst.Prefix[dataSizeIndex] |= PrefixIgnored 423 } 424 } 425 } 426 427 // Decode instruction stream, interpreting decoding instructions. 428 // opshift gives the shift to use when saving the next 429 // opcode byte into inst.Opcode. 430 opshift = 24 431 if decoderCover == nil { 432 decoderCover = make([]bool, len(decoder)) 433 } 434 435 // Decode loop, executing decoder program. 436 var oldPC, prevPC int 437 Decode: 438 for pc := 1; ; { // TODO uint 439 oldPC = prevPC 440 prevPC = pc 441 if trace { 442 println("run", pc) 443 } 444 x := decoder[pc] 445 decoderCover[pc] = true 446 pc++ 447 448 // Read and decode ModR/M if needed by opcode. 449 switch decodeOp(x) { 450 case xCondSlashR, xReadSlashR: 451 if haveModrm { 452 return Inst{Len: pos}, errInternal 453 } 454 haveModrm = true 455 if pos >= len(src) { 456 return truncated(src, mode) 457 } 458 modrm = int(src[pos]) 459 pos++ 460 if opshift >= 0 { 461 inst.Opcode |= uint32(modrm) << uint(opshift) 462 opshift -= 8 463 } 464 mod = modrm >> 6 465 regop = (modrm >> 3) & 07 466 rm = modrm & 07 467 if rex&PrefixREXR != 0 { 468 rexUsed |= PrefixREXR 469 regop |= 8 470 } 471 if addrMode == 16 { 472 // 16-bit modrm form 473 if mod != 3 { 474 haveMem = true 475 mem = addr16[rm] 476 if rm == 6 && mod == 0 { 477 mem.Base = 0 478 } 479 480 // Consume disp16 if present. 481 if mod == 0 && rm == 6 || mod == 2 { 482 if pos+2 > len(src) { 483 return truncated(src, mode) 484 } 485 mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:])) 486 pos += 2 487 } 488 489 // Consume disp8 if present. 490 if mod == 1 { 491 if pos >= len(src) { 492 return truncated(src, mode) 493 } 494 mem.Disp = int64(int8(src[pos])) 495 pos++ 496 } 497 } 498 } else { 499 haveMem = mod != 3 500 501 // 32-bit or 64-bit form 502 // Consume SIB encoding if present. 503 if rm == 4 && mod != 3 { 504 haveSIB = true 505 if pos >= len(src) { 506 return truncated(src, mode) 507 } 508 sib = int(src[pos]) 509 pos++ 510 if opshift >= 0 { 511 inst.Opcode |= uint32(sib) << uint(opshift) 512 opshift -= 8 513 } 514 scale = sib >> 6 515 index = (sib >> 3) & 07 516 base = sib & 07 517 if rex&PrefixREXB != 0 { 518 rexUsed |= PrefixREXB 519 base |= 8 520 } 521 if rex&PrefixREXX != 0 { 522 rexUsed |= PrefixREXX 523 index |= 8 524 } 525 526 mem.Scale = 1 << uint(scale) 527 if index == 4 { 528 // no mem.Index 529 } else { 530 mem.Index = baseRegForBits(addrMode) + Reg(index) 531 } 532 if base&7 == 5 && mod == 0 { 533 // no mem.Base 534 } else { 535 mem.Base = baseRegForBits(addrMode) + Reg(base) 536 } 537 } else { 538 if rex&PrefixREXB != 0 { 539 rexUsed |= PrefixREXB 540 rm |= 8 541 } 542 if mod == 0 && rm&7 == 5 || rm&7 == 4 { 543 // base omitted 544 } else if mod != 3 { 545 mem.Base = baseRegForBits(addrMode) + Reg(rm) 546 } 547 } 548 549 // Consume disp32 if present. 550 if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 { 551 if pos+4 > len(src) { 552 return truncated(src, mode) 553 } 554 dispoff = pos 555 displen = 4 556 mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:])) 557 pos += 4 558 } 559 560 // Consume disp8 if present. 561 if mod == 1 { 562 if pos >= len(src) { 563 return truncated(src, mode) 564 } 565 dispoff = pos 566 displen = 1 567 mem.Disp = int64(int8(src[pos])) 568 pos++ 569 } 570 571 // In 64-bit, mod=0 rm=5 is PC-relative instead of just disp. 572 // See Vol 2A. Table 2-7. 573 if mode == 64 && mod == 0 && rm&7 == 5 { 574 if addrMode == 32 { 575 mem.Base = EIP 576 } else { 577 mem.Base = RIP 578 } 579 } 580 } 581 582 if segIndex >= 0 { 583 mem.Segment = prefixToSegment(inst.Prefix[segIndex]) 584 } 585 } 586 587 // Execute single opcode. 588 switch decodeOp(x) { 589 default: 590 println("bad op", x, "at", pc-1, "from", oldPC) 591 return Inst{Len: pos}, errInternal 592 593 case xFail: 594 inst.Op = 0 595 break Decode 596 597 case xMatch: 598 break Decode 599 600 case xJump: 601 pc = int(decoder[pc]) 602 603 // Conditional branches. 604 605 case xCondByte: 606 if pos >= len(src) { 607 return truncated(src, mode) 608 } 609 b := src[pos] 610 n := int(decoder[pc]) 611 pc++ 612 for i := 0; i < n; i++ { 613 xb, xpc := decoder[pc], int(decoder[pc+1]) 614 pc += 2 615 if b == byte(xb) { 616 pc = xpc 617 pos++ 618 if opshift >= 0 { 619 inst.Opcode |= uint32(b) << uint(opshift) 620 opshift -= 8 621 } 622 continue Decode 623 } 624 } 625 // xCondByte is the only conditional with a fall through, 626 // so that it can be used to pick off special cases before 627 // an xCondSlash. If the fallthrough instruction is xFail, 628 // advance the position so that the decoded instruction 629 // size includes the byte we just compared against. 630 if decodeOp(decoder[pc]) == xJump { 631 pc = int(decoder[pc+1]) 632 } 633 if decodeOp(decoder[pc]) == xFail { 634 pos++ 635 } 636 637 case xCondIs64: 638 if mode == 64 { 639 pc = int(decoder[pc+1]) 640 } else { 641 pc = int(decoder[pc]) 642 } 643 644 case xCondIsMem: 645 mem := haveMem 646 if !haveModrm { 647 if pos >= len(src) { 648 return instPrefix(src[0], mode) // too long 649 } 650 mem = src[pos]>>6 != 3 651 } 652 if mem { 653 pc = int(decoder[pc+1]) 654 } else { 655 pc = int(decoder[pc]) 656 } 657 658 case xCondDataSize: 659 switch dataMode { 660 case 16: 661 if dataSizeIndex >= 0 { 662 inst.Prefix[dataSizeIndex] |= PrefixImplicit 663 } 664 pc = int(decoder[pc]) 665 case 32: 666 if dataSizeIndex >= 0 { 667 inst.Prefix[dataSizeIndex] |= PrefixImplicit 668 } 669 pc = int(decoder[pc+1]) 670 case 64: 671 rexUsed |= PrefixREXW 672 pc = int(decoder[pc+2]) 673 } 674 675 case xCondAddrSize: 676 switch addrMode { 677 case 16: 678 if addrSizeIndex >= 0 { 679 inst.Prefix[addrSizeIndex] |= PrefixImplicit 680 } 681 pc = int(decoder[pc]) 682 case 32: 683 if addrSizeIndex >= 0 { 684 inst.Prefix[addrSizeIndex] |= PrefixImplicit 685 } 686 pc = int(decoder[pc+1]) 687 case 64: 688 pc = int(decoder[pc+2]) 689 } 690 691 case xCondPrefix: 692 // Conditional branch based on presence or absence of prefixes. 693 // The conflict cases here are completely undocumented and 694 // differ significantly between GNU libopcodes and Intel xed. 695 // I have not written assembly code to divine what various CPUs 696 // do, but it wouldn't surprise me if they are not consistent either. 697 // 698 // The basic idea is to switch on the presence of a prefix, so that 699 // for example: 700 // 701 // xCondPrefix, 4 702 // 0xF3, 123, 703 // 0xF2, 234, 704 // 0x66, 345, 705 // 0, 456 706 // 707 // branch to 123 if the F3 prefix is present, 234 if the F2 prefix 708 // is present, 66 if the 345 prefix is present, and 456 otherwise. 709 // The prefixes are given in descending order so that the 0 will be last. 710 // 711 // It is unclear what should happen if multiple conditions are 712 // satisfied: what if F2 and F3 are both present, or if 66 and F2 713 // are present, or if all three are present? The one chosen becomes 714 // part of the opcode and the others do not. Perhaps the answer 715 // depends on the specific opcodes in question. 716 // 717 // The only clear example is that CRC32 is F2 0F 38 F1 /r, and 718 // it comes in 16-bit and 32-bit forms based on the 66 prefix, 719 // so 66 F2 0F 38 F1 /r should be treated as F2 taking priority, 720 // with the 66 being only an operand size override, and probably 721 // F2 66 0F 38 F1 /r should be treated the same. 722 // Perhaps that rule is specific to the case of CRC32, since no 723 // 66 0F 38 F1 instruction is defined (today) (that we know of). 724 // However, both libopcodes and xed seem to generalize this 725 // example and choose F2/F3 in preference to 66, and we 726 // do the same. 727 // 728 // Next, what if both F2 and F3 are present? Which wins? 729 // The Intel xed rule, and ours, is that the one that occurs last wins. 730 // The GNU libopcodes rule, which we implement only in gnuCompat mode, 731 // is that F3 beats F2 unless F3 has no special meaning, in which 732 // case F3 can be a modified on an F2 special meaning. 733 // 734 // Concretely, 735 // 66 0F D6 /r is MOVQ 736 // F2 0F D6 /r is MOVDQ2Q 737 // F3 0F D6 /r is MOVQ2DQ. 738 // 739 // F2 66 0F D6 /r is 66 + MOVDQ2Q always. 740 // 66 F2 0F D6 /r is 66 + MOVDQ2Q always. 741 // F3 66 0F D6 /r is 66 + MOVQ2DQ always. 742 // 66 F3 0F D6 /r is 66 + MOVQ2DQ always. 743 // F2 F3 0F D6 /r is F2 + MOVQ2DQ always. 744 // F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes. 745 // Adding 66 anywhere in the prefix section of the 746 // last two cases does not change the outcome. 747 // 748 // Finally, what if there is a variant in which 66 is a mandatory 749 // prefix rather than an operand size override, but we know of 750 // no corresponding F2/F3 form, and we see both F2/F3 and 66. 751 // Does F2/F3 still take priority, so that the result is an unknown 752 // instruction, or does the 66 take priority, so that the extended 753 // 66 instruction should be interpreted as having a REP/REPN prefix? 754 // Intel xed does the former and GNU libopcodes does the latter. 755 // We side with Intel xed, unless we are trying to match libopcodes 756 // more closely during the comparison-based test suite. 757 // 758 // In 64-bit mode REX.W is another valid prefix to test for, but 759 // there is less ambiguity about that. When present, REX.W is 760 // always the first entry in the table. 761 n := int(decoder[pc]) 762 pc++ 763 sawF3 := false 764 for j := 0; j < n; j++ { 765 prefix := Prefix(decoder[pc+2*j]) 766 if prefix.IsREX() { 767 rexUsed |= prefix 768 if rex&prefix == prefix { 769 pc = int(decoder[pc+2*j+1]) 770 continue Decode 771 } 772 continue 773 } 774 ok := false 775 if prefix == 0 { 776 ok = true 777 } else if prefix.IsREX() { 778 rexUsed |= prefix 779 if rex&prefix == prefix { 780 ok = true 781 } 782 } else { 783 if prefix == 0xF3 { 784 sawF3 = true 785 } 786 switch prefix { 787 case PrefixLOCK: 788 if lockIndex >= 0 { 789 inst.Prefix[lockIndex] |= PrefixImplicit 790 ok = true 791 } 792 case PrefixREP, PrefixREPN: 793 if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix { 794 inst.Prefix[repIndex] |= PrefixImplicit 795 ok = true 796 } 797 if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) { 798 // Check to see if earlier prefix F3 is present. 799 for i := repIndex - 1; i >= 0; i-- { 800 if inst.Prefix[i]&0xFF == prefix { 801 inst.Prefix[i] |= PrefixImplicit 802 ok = true 803 } 804 } 805 } 806 if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 { 807 // Check to see if earlier prefix F2 is present. 808 for i := repIndex - 1; i >= 0; i-- { 809 if inst.Prefix[i]&0xFF == prefix { 810 inst.Prefix[i] |= PrefixImplicit 811 ok = true 812 } 813 } 814 } 815 case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS: 816 if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix { 817 inst.Prefix[segIndex] |= PrefixImplicit 818 ok = true 819 } 820 case PrefixDataSize: 821 // Looking for 66 mandatory prefix. 822 // The F2/F3 mandatory prefixes take priority when both are present. 823 // If we got this far in the xCondPrefix table and an F2/F3 is present, 824 // it means the table didn't have any entry for that prefix. But if 66 has 825 // special meaning, perhaps F2/F3 have special meaning that we don't know. 826 // Intel xed works this way, treating the F2/F3 as inhibiting the 66. 827 // GNU libopcodes allows the 66 to match. We do what Intel xed does 828 // except in gnuCompat mode. 829 if repIndex >= 0 && !gnuCompat { 830 inst.Op = 0 831 break Decode 832 } 833 if dataSizeIndex >= 0 { 834 inst.Prefix[dataSizeIndex] |= PrefixImplicit 835 ok = true 836 } 837 case PrefixAddrSize: 838 if addrSizeIndex >= 0 { 839 inst.Prefix[addrSizeIndex] |= PrefixImplicit 840 ok = true 841 } 842 } 843 } 844 if ok { 845 pc = int(decoder[pc+2*j+1]) 846 continue Decode 847 } 848 } 849 inst.Op = 0 850 break Decode 851 852 case xCondSlashR: 853 pc = int(decoder[pc+regop&7]) 854 855 // Input. 856 857 case xReadSlashR: 858 // done above 859 860 case xReadIb: 861 if pos >= len(src) { 862 return truncated(src, mode) 863 } 864 imm8 = int8(src[pos]) 865 pos++ 866 867 case xReadIw: 868 if pos+2 > len(src) { 869 return truncated(src, mode) 870 } 871 imm = int64(binary.LittleEndian.Uint16(src[pos:])) 872 pos += 2 873 874 case xReadId: 875 if pos+4 > len(src) { 876 return truncated(src, mode) 877 } 878 imm = int64(binary.LittleEndian.Uint32(src[pos:])) 879 pos += 4 880 881 case xReadIo: 882 if pos+8 > len(src) { 883 return truncated(src, mode) 884 } 885 imm = int64(binary.LittleEndian.Uint64(src[pos:])) 886 pos += 8 887 888 case xReadCb: 889 if pos >= len(src) { 890 return truncated(src, mode) 891 } 892 immcpos = pos 893 immc = int64(src[pos]) 894 pos++ 895 896 case xReadCw: 897 if pos+2 > len(src) { 898 return truncated(src, mode) 899 } 900 immcpos = pos 901 immc = int64(binary.LittleEndian.Uint16(src[pos:])) 902 pos += 2 903 904 case xReadCm: 905 immcpos = pos 906 if addrMode == 16 { 907 if pos+2 > len(src) { 908 return truncated(src, mode) 909 } 910 immc = int64(binary.LittleEndian.Uint16(src[pos:])) 911 pos += 2 912 } else if addrMode == 32 { 913 if pos+4 > len(src) { 914 return truncated(src, mode) 915 } 916 immc = int64(binary.LittleEndian.Uint32(src[pos:])) 917 pos += 4 918 } else { 919 if pos+8 > len(src) { 920 return truncated(src, mode) 921 } 922 immc = int64(binary.LittleEndian.Uint64(src[pos:])) 923 pos += 8 924 } 925 case xReadCd: 926 immcpos = pos 927 if pos+4 > len(src) { 928 return truncated(src, mode) 929 } 930 immc = int64(binary.LittleEndian.Uint32(src[pos:])) 931 pos += 4 932 933 case xReadCp: 934 immcpos = pos 935 if pos+6 > len(src) { 936 return truncated(src, mode) 937 } 938 w := binary.LittleEndian.Uint32(src[pos:]) 939 w2 := binary.LittleEndian.Uint16(src[pos+4:]) 940 immc = int64(w2)<<32 | int64(w) 941 pos += 6 942 943 // Output. 944 945 case xSetOp: 946 inst.Op = Op(decoder[pc]) 947 pc++ 948 949 case xArg1, 950 xArg3, 951 xArgAL, 952 xArgAX, 953 xArgCL, 954 xArgCS, 955 xArgDS, 956 xArgDX, 957 xArgEAX, 958 xArgEDX, 959 xArgES, 960 xArgFS, 961 xArgGS, 962 xArgRAX, 963 xArgRDX, 964 xArgSS, 965 xArgST, 966 xArgXMM0: 967 inst.Args[narg] = fixedArg[x] 968 narg++ 969 970 case xArgImm8: 971 inst.Args[narg] = Imm(imm8) 972 narg++ 973 974 case xArgImm8u: 975 inst.Args[narg] = Imm(uint8(imm8)) 976 narg++ 977 978 case xArgImm16: 979 inst.Args[narg] = Imm(int16(imm)) 980 narg++ 981 982 case xArgImm16u: 983 inst.Args[narg] = Imm(uint16(imm)) 984 narg++ 985 986 case xArgImm32: 987 inst.Args[narg] = Imm(int32(imm)) 988 narg++ 989 990 case xArgImm64: 991 inst.Args[narg] = Imm(imm) 992 narg++ 993 994 case xArgM, 995 xArgM128, 996 xArgM1428byte, 997 xArgM16, 998 xArgM16and16, 999 xArgM16and32, 1000 xArgM16and64, 1001 xArgM16colon16, 1002 xArgM16colon32, 1003 xArgM16colon64, 1004 xArgM16int, 1005 xArgM2byte, 1006 xArgM32, 1007 xArgM32and32, 1008 xArgM32fp, 1009 xArgM32int, 1010 xArgM512byte, 1011 xArgM64, 1012 xArgM64fp, 1013 xArgM64int, 1014 xArgM8, 1015 xArgM80bcd, 1016 xArgM80dec, 1017 xArgM80fp, 1018 xArgM94108byte, 1019 xArgMem: 1020 if !haveMem { 1021 inst.Op = 0 1022 break Decode 1023 } 1024 inst.Args[narg] = mem 1025 inst.MemBytes = int(memBytes[decodeOp(x)]) 1026 if mem.Base == RIP { 1027 inst.PCRel = displen 1028 inst.PCRelOff = dispoff 1029 } 1030 narg++ 1031 1032 case xArgPtr16colon16: 1033 inst.Args[narg] = Imm(immc >> 16) 1034 inst.Args[narg+1] = Imm(immc & (1<<16 - 1)) 1035 narg += 2 1036 1037 case xArgPtr16colon32: 1038 inst.Args[narg] = Imm(immc >> 32) 1039 inst.Args[narg+1] = Imm(immc & (1<<32 - 1)) 1040 narg += 2 1041 1042 case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64: 1043 // TODO(rsc): Can address be 64 bits? 1044 mem = Mem{Disp: immc} 1045 if segIndex >= 0 { 1046 mem.Segment = prefixToSegment(inst.Prefix[segIndex]) 1047 inst.Prefix[segIndex] |= PrefixImplicit 1048 } 1049 inst.Args[narg] = mem 1050 inst.MemBytes = int(memBytes[decodeOp(x)]) 1051 if mem.Base == RIP { 1052 inst.PCRel = displen 1053 inst.PCRelOff = dispoff 1054 } 1055 narg++ 1056 1057 case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7: 1058 base := baseReg[x] 1059 index := Reg(regop) 1060 if rex != 0 && base == AL && index >= 4 { 1061 rexUsed |= PrefixREX 1062 index -= 4 1063 base = SPB 1064 } 1065 inst.Args[narg] = base + index 1066 narg++ 1067 1068 case xArgMm, xArgMm1, xArgTR0dashTR7: 1069 inst.Args[narg] = baseReg[x] + Reg(regop&7) 1070 narg++ 1071 1072 case xArgCR0dashCR7: 1073 // AMD documents an extension that the LOCK prefix 1074 // can be used in place of a REX prefix in order to access 1075 // CR8 from 32-bit mode. The LOCK prefix is allowed in 1076 // all modes, provided the corresponding CPUID bit is set. 1077 if lockIndex >= 0 { 1078 inst.Prefix[lockIndex] |= PrefixImplicit 1079 regop += 8 1080 } 1081 inst.Args[narg] = CR0 + Reg(regop) 1082 narg++ 1083 1084 case xArgSreg: 1085 regop &= 7 1086 if regop >= 6 { 1087 inst.Op = 0 1088 break Decode 1089 } 1090 inst.Args[narg] = ES + Reg(regop) 1091 narg++ 1092 1093 case xArgRmf16, xArgRmf32, xArgRmf64: 1094 base := baseReg[x] 1095 index := Reg(modrm & 07) 1096 if rex&PrefixREXB != 0 { 1097 rexUsed |= PrefixREXB 1098 index += 8 1099 } 1100 inst.Args[narg] = base + index 1101 narg++ 1102 1103 case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi: 1104 n := inst.Opcode >> uint(opshift+8) & 07 1105 base := baseReg[x] 1106 index := Reg(n) 1107 if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi { 1108 rexUsed |= PrefixREXB 1109 index += 8 1110 } 1111 if rex != 0 && base == AL && index >= 4 { 1112 rexUsed |= PrefixREX 1113 index -= 4 1114 base = SPB 1115 } 1116 inst.Args[narg] = base + index 1117 narg++ 1118 1119 case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16, 1120 xArgMmM32, xArgMmM64, xArgMm2M64, 1121 xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128: 1122 if haveMem { 1123 inst.Args[narg] = mem 1124 inst.MemBytes = int(memBytes[decodeOp(x)]) 1125 if mem.Base == RIP { 1126 inst.PCRel = displen 1127 inst.PCRelOff = dispoff 1128 } 1129 } else { 1130 base := baseReg[x] 1131 index := Reg(rm) 1132 switch decodeOp(x) { 1133 case xArgMmM32, xArgMmM64, xArgMm2M64: 1134 // There are only 8 MMX registers, so these ignore the REX.X bit. 1135 index &= 7 1136 case xArgRM8: 1137 if rex != 0 && index >= 4 { 1138 rexUsed |= PrefixREX 1139 index -= 4 1140 base = SPB 1141 } 1142 } 1143 inst.Args[narg] = base + index 1144 } 1145 narg++ 1146 1147 case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag 1148 if haveMem { 1149 inst.Op = 0 1150 break Decode 1151 } 1152 inst.Args[narg] = baseReg[x] + Reg(rm&7) 1153 narg++ 1154 1155 case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag 1156 if haveMem { 1157 inst.Op = 0 1158 break Decode 1159 } 1160 inst.Args[narg] = baseReg[x] + Reg(rm) 1161 narg++ 1162 1163 case xArgRel8: 1164 inst.PCRelOff = immcpos 1165 inst.PCRel = 1 1166 inst.Args[narg] = Rel(int8(immc)) 1167 narg++ 1168 1169 case xArgRel16: 1170 inst.PCRelOff = immcpos 1171 inst.PCRel = 2 1172 inst.Args[narg] = Rel(int16(immc)) 1173 narg++ 1174 1175 case xArgRel32: 1176 inst.PCRelOff = immcpos 1177 inst.PCRel = 4 1178 inst.Args[narg] = Rel(int32(immc)) 1179 narg++ 1180 } 1181 } 1182 1183 if inst.Op == 0 { 1184 // Invalid instruction. 1185 if nprefix > 0 { 1186 return instPrefix(src[0], mode) // invalid instruction 1187 } 1188 return Inst{Len: pos}, ErrUnrecognized 1189 } 1190 1191 // Matched! Hooray! 1192 1193 // 90 decodes as XCHG EAX, EAX but is NOP. 1194 // 66 90 decodes as XCHG AX, AX and is NOP too. 1195 // 48 90 decodes as XCHG RAX, RAX and is NOP too. 1196 // 43 90 decodes as XCHG R8D, EAX and is *not* NOP. 1197 // F3 90 decodes as REP XCHG EAX, EAX but is PAUSE. 1198 // It's all too special to handle in the decoding tables, at least for now. 1199 if inst.Op == XCHG && inst.Opcode>>24 == 0x90 { 1200 if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX { 1201 inst.Op = NOP 1202 if dataSizeIndex >= 0 { 1203 inst.Prefix[dataSizeIndex] &^= PrefixImplicit 1204 } 1205 inst.Args[0] = nil 1206 inst.Args[1] = nil 1207 } 1208 if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 { 1209 inst.Prefix[repIndex] |= PrefixImplicit 1210 inst.Op = PAUSE 1211 inst.Args[0] = nil 1212 inst.Args[1] = nil 1213 } else if gnuCompat { 1214 for i := nprefix - 1; i >= 0; i-- { 1215 if inst.Prefix[i]&0xFF == 0xF3 { 1216 inst.Prefix[i] |= PrefixImplicit 1217 inst.Op = PAUSE 1218 inst.Args[0] = nil 1219 inst.Args[1] = nil 1220 break 1221 } 1222 } 1223 } 1224 } 1225 1226 // defaultSeg returns the default segment for an implicit 1227 // memory reference: the final override if present, or else DS. 1228 defaultSeg := func() Reg { 1229 if segIndex >= 0 { 1230 inst.Prefix[segIndex] |= PrefixImplicit 1231 return prefixToSegment(inst.Prefix[segIndex]) 1232 } 1233 return DS 1234 } 1235 1236 // Add implicit arguments not present in the tables. 1237 // Normally we shy away from making implicit arguments explicit, 1238 // following the Intel manuals, but adding the arguments seems 1239 // the best way to express the effect of the segment override prefixes. 1240 // TODO(rsc): Perhaps add these to the tables and 1241 // create bytecode instructions for them. 1242 usedAddrSize := false 1243 switch inst.Op { 1244 case INSB, INSW, INSD: 1245 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1246 inst.Args[1] = DX 1247 usedAddrSize = true 1248 1249 case OUTSB, OUTSW, OUTSD: 1250 inst.Args[0] = DX 1251 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1252 usedAddrSize = true 1253 1254 case MOVSB, MOVSW, MOVSD, MOVSQ: 1255 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1256 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1257 usedAddrSize = true 1258 1259 case CMPSB, CMPSW, CMPSD, CMPSQ: 1260 inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1261 inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1262 usedAddrSize = true 1263 1264 case LODSB, LODSW, LODSD, LODSQ: 1265 switch inst.Op { 1266 case LODSB: 1267 inst.Args[0] = AL 1268 case LODSW: 1269 inst.Args[0] = AX 1270 case LODSD: 1271 inst.Args[0] = EAX 1272 case LODSQ: 1273 inst.Args[0] = RAX 1274 } 1275 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1276 usedAddrSize = true 1277 1278 case STOSB, STOSW, STOSD, STOSQ: 1279 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1280 switch inst.Op { 1281 case STOSB: 1282 inst.Args[1] = AL 1283 case STOSW: 1284 inst.Args[1] = AX 1285 case STOSD: 1286 inst.Args[1] = EAX 1287 case STOSQ: 1288 inst.Args[1] = RAX 1289 } 1290 usedAddrSize = true 1291 1292 case SCASB, SCASW, SCASD, SCASQ: 1293 inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1294 switch inst.Op { 1295 case SCASB: 1296 inst.Args[0] = AL 1297 case SCASW: 1298 inst.Args[0] = AX 1299 case SCASD: 1300 inst.Args[0] = EAX 1301 case SCASQ: 1302 inst.Args[0] = RAX 1303 } 1304 usedAddrSize = true 1305 1306 case XLATB: 1307 inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX} 1308 usedAddrSize = true 1309 } 1310 1311 // If we used the address size annotation to construct the 1312 // argument list, mark that prefix as implicit: it doesn't need 1313 // to be shown when printing the instruction. 1314 if haveMem || usedAddrSize { 1315 if addrSizeIndex >= 0 { 1316 inst.Prefix[addrSizeIndex] |= PrefixImplicit 1317 } 1318 } 1319 1320 // Similarly, if there's some memory operand, the segment 1321 // will be shown there and doesn't need to be shown as an 1322 // explicit prefix. 1323 if haveMem { 1324 if segIndex >= 0 { 1325 inst.Prefix[segIndex] |= PrefixImplicit 1326 } 1327 } 1328 1329 // Branch predict prefixes are overloaded segment prefixes, 1330 // since segment prefixes don't make sense on conditional jumps. 1331 // Rewrite final instance to prediction prefix. 1332 // The set of instructions to which the prefixes apply (other then the 1333 // Jcc conditional jumps) is not 100% clear from the manuals, but 1334 // the disassemblers seem to agree about the LOOP and JCXZ instructions, 1335 // so we'll follow along. 1336 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1337 if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ { 1338 PredictLoop: 1339 for i := nprefix - 1; i >= 0; i-- { 1340 p := inst.Prefix[i] 1341 switch p & 0xFF { 1342 case PrefixCS: 1343 inst.Prefix[i] = PrefixPN 1344 break PredictLoop 1345 case PrefixDS: 1346 inst.Prefix[i] = PrefixPT 1347 break PredictLoop 1348 } 1349 } 1350 } 1351 1352 // The BND prefix is part of the Intel Memory Protection Extensions (MPX). 1353 // A REPN applied to certain control transfers is a BND prefix to bound 1354 // the range of possible destinations. There's surprisingly little documentation 1355 // about this, so we just do what libopcodes and xed agree on. 1356 // In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions 1357 // does not turn into a BND. 1358 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1359 if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET { 1360 for i := nprefix - 1; i >= 0; i-- { 1361 p := inst.Prefix[i] 1362 if p&^PrefixIgnored == PrefixREPN { 1363 inst.Prefix[i] = PrefixBND 1364 break 1365 } 1366 } 1367 } 1368 1369 // The LOCK prefix only applies to certain instructions, and then only 1370 // to instances of the instruction with a memory destination. 1371 // Other uses of LOCK are invalid and cause a processor exception, 1372 // in contrast to the "just ignore it" spirit applied to all other prefixes. 1373 // Mark invalid lock prefixes. 1374 hasLock := false 1375 if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 { 1376 switch inst.Op { 1377 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1378 case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG: 1379 if isMem(inst.Args[0]) { 1380 hasLock = true 1381 break 1382 } 1383 fallthrough 1384 default: 1385 inst.Prefix[lockIndex] |= PrefixInvalid 1386 } 1387 } 1388 1389 // In certain cases, all of which require a memory destination, 1390 // the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE 1391 // from the Intel Transactional Synchroniation Extensions (TSX). 1392 // 1393 // The specific rules are: 1394 // (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE. 1395 // (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE. 1396 // (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE. 1397 if isMem(inst.Args[0]) { 1398 if inst.Op == XCHG { 1399 hasLock = true 1400 } 1401 1402 for i := len(inst.Prefix) - 1; i >= 0; i-- { 1403 p := inst.Prefix[i] &^ PrefixIgnored 1404 switch p { 1405 case PrefixREPN: 1406 if hasLock { 1407 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE 1408 } 1409 1410 case PrefixREP: 1411 if hasLock { 1412 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE 1413 } 1414 1415 if inst.Op == MOV { 1416 op := (inst.Opcode >> 24) &^ 1 1417 if op == 0x88 || op == 0xC6 { 1418 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE 1419 } 1420 } 1421 } 1422 } 1423 } 1424 1425 // If REP is used on a non-REP-able instruction, mark the prefix as ignored. 1426 if repIndex >= 0 { 1427 switch inst.Prefix[repIndex] { 1428 case PrefixREP, PrefixREPN: 1429 switch inst.Op { 1430 // According to the manuals, the REP/REPE prefix applies to all of these, 1431 // while the REPN applies only to some of them. However, both libopcodes 1432 // and xed show both prefixes explicitly for all instructions, so we do the same. 1433 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1434 case INSB, INSW, INSD, 1435 MOVSB, MOVSW, MOVSD, MOVSQ, 1436 OUTSB, OUTSW, OUTSD, 1437 LODSB, LODSW, LODSD, LODSQ, 1438 CMPSB, CMPSW, CMPSD, CMPSQ, 1439 SCASB, SCASW, SCASD, SCASQ, 1440 STOSB, STOSW, STOSD, STOSQ: 1441 // ok 1442 default: 1443 inst.Prefix[repIndex] |= PrefixIgnored 1444 } 1445 } 1446 } 1447 1448 // If REX was present, mark implicit if all the 1 bits were consumed. 1449 if rexIndex >= 0 { 1450 if rexUsed != 0 { 1451 rexUsed |= PrefixREX 1452 } 1453 if rex&^rexUsed == 0 { 1454 inst.Prefix[rexIndex] |= PrefixImplicit 1455 } 1456 } 1457 1458 inst.DataSize = dataMode 1459 inst.AddrSize = addrMode 1460 inst.Mode = mode 1461 inst.Len = pos 1462 return inst, nil 1463 } 1464 1465 var errInternal = errors.New("internal error") 1466 1467 // addr16 records the eight 16-bit addressing modes. 1468 var addr16 = [8]Mem{ 1469 {Base: BX, Scale: 1, Index: SI}, 1470 {Base: BX, Scale: 1, Index: DI}, 1471 {Base: BP, Scale: 1, Index: SI}, 1472 {Base: BP, Scale: 1, Index: DI}, 1473 {Base: SI}, 1474 {Base: DI}, 1475 {Base: BP}, 1476 {Base: BX}, 1477 } 1478 1479 // baseReg returns the base register for a given register size in bits. 1480 func baseRegForBits(bits int) Reg { 1481 switch bits { 1482 case 8: 1483 return AL 1484 case 16: 1485 return AX 1486 case 32: 1487 return EAX 1488 case 64: 1489 return RAX 1490 } 1491 return 0 1492 } 1493 1494 // baseReg records the base register for argument types that specify 1495 // a range of registers indexed by op, regop, or rm. 1496 var baseReg = [...]Reg{ 1497 xArgDR0dashDR7: DR0, 1498 xArgMm1: M0, 1499 xArgMm2: M0, 1500 xArgMm2M64: M0, 1501 xArgMm: M0, 1502 xArgMmM32: M0, 1503 xArgMmM64: M0, 1504 xArgR16: AX, 1505 xArgR16op: AX, 1506 xArgR32: EAX, 1507 xArgR32M16: EAX, 1508 xArgR32M8: EAX, 1509 xArgR32op: EAX, 1510 xArgR64: RAX, 1511 xArgR64M16: RAX, 1512 xArgR64op: RAX, 1513 xArgR8: AL, 1514 xArgR8op: AL, 1515 xArgRM16: AX, 1516 xArgRM32: EAX, 1517 xArgRM64: RAX, 1518 xArgRM8: AL, 1519 xArgRmf16: AX, 1520 xArgRmf32: EAX, 1521 xArgRmf64: RAX, 1522 xArgSTi: F0, 1523 xArgTR0dashTR7: TR0, 1524 xArgXmm1: X0, 1525 xArgXmm2: X0, 1526 xArgXmm2M128: X0, 1527 xArgXmm2M16: X0, 1528 xArgXmm2M32: X0, 1529 xArgXmm2M64: X0, 1530 xArgXmm: X0, 1531 xArgXmmM128: X0, 1532 xArgXmmM32: X0, 1533 xArgXmmM64: X0, 1534 } 1535 1536 // prefixToSegment returns the segment register 1537 // corresponding to a particular segment prefix. 1538 func prefixToSegment(p Prefix) Reg { 1539 switch p &^ PrefixImplicit { 1540 case PrefixCS: 1541 return CS 1542 case PrefixDS: 1543 return DS 1544 case PrefixES: 1545 return ES 1546 case PrefixFS: 1547 return FS 1548 case PrefixGS: 1549 return GS 1550 case PrefixSS: 1551 return SS 1552 } 1553 return 0 1554 } 1555 1556 // fixedArg records the fixed arguments corresponding to the given bytecodes. 1557 var fixedArg = [...]Arg{ 1558 xArg1: Imm(1), 1559 xArg3: Imm(3), 1560 xArgAL: AL, 1561 xArgAX: AX, 1562 xArgDX: DX, 1563 xArgEAX: EAX, 1564 xArgEDX: EDX, 1565 xArgRAX: RAX, 1566 xArgRDX: RDX, 1567 xArgCL: CL, 1568 xArgCS: CS, 1569 xArgDS: DS, 1570 xArgES: ES, 1571 xArgFS: FS, 1572 xArgGS: GS, 1573 xArgSS: SS, 1574 xArgST: F0, 1575 xArgXMM0: X0, 1576 } 1577 1578 // memBytes records the size of the memory pointed at 1579 // by a memory argument of the given form. 1580 var memBytes = [...]int8{ 1581 xArgM128: 128 / 8, 1582 xArgM16: 16 / 8, 1583 xArgM16and16: (16 + 16) / 8, 1584 xArgM16colon16: (16 + 16) / 8, 1585 xArgM16colon32: (16 + 32) / 8, 1586 xArgM16int: 16 / 8, 1587 xArgM2byte: 2, 1588 xArgM32: 32 / 8, 1589 xArgM32and32: (32 + 32) / 8, 1590 xArgM32fp: 32 / 8, 1591 xArgM32int: 32 / 8, 1592 xArgM64: 64 / 8, 1593 xArgM64fp: 64 / 8, 1594 xArgM64int: 64 / 8, 1595 xArgMm2M64: 64 / 8, 1596 xArgMmM32: 32 / 8, 1597 xArgMmM64: 64 / 8, 1598 xArgMoffs16: 16 / 8, 1599 xArgMoffs32: 32 / 8, 1600 xArgMoffs64: 64 / 8, 1601 xArgMoffs8: 8 / 8, 1602 xArgR32M16: 16 / 8, 1603 xArgR32M8: 8 / 8, 1604 xArgR64M16: 16 / 8, 1605 xArgRM16: 16 / 8, 1606 xArgRM32: 32 / 8, 1607 xArgRM64: 64 / 8, 1608 xArgRM8: 8 / 8, 1609 xArgXmm2M128: 128 / 8, 1610 xArgXmm2M16: 16 / 8, 1611 xArgXmm2M32: 32 / 8, 1612 xArgXmm2M64: 64 / 8, 1613 xArgXmm: 128 / 8, 1614 xArgXmmM128: 128 / 8, 1615 xArgXmmM32: 32 / 8, 1616 xArgXmmM64: 64 / 8, 1617 } 1618 1619 // isCondJmp records the conditional jumps. 1620 var isCondJmp = [maxOp + 1]bool{ 1621 JA: true, 1622 JAE: true, 1623 JB: true, 1624 JBE: true, 1625 JE: true, 1626 JG: true, 1627 JGE: true, 1628 JL: true, 1629 JLE: true, 1630 JNE: true, 1631 JNO: true, 1632 JNP: true, 1633 JNS: true, 1634 JO: true, 1635 JP: true, 1636 JS: true, 1637 } 1638 1639 // isLoop records the loop operators. 1640 var isLoop = [maxOp + 1]bool{ 1641 LOOP: true, 1642 LOOPE: true, 1643 LOOPNE: true, 1644 JECXZ: true, 1645 JRCXZ: true, 1646 } 1647