1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Table-driven decoding of x86 instructions. 6 7 package x86asm 8 9 import ( 10 "encoding/binary" 11 "errors" 12 "fmt" 13 "runtime" 14 ) 15 16 // Set trace to true to cause the decoder to print the PC sequence 17 // of the executed instruction codes. This is typically only useful 18 // when you are running a test of a single input case. 19 const trace = false 20 21 // A decodeOp is a single instruction in the decoder bytecode program. 22 // 23 // The decodeOps correspond to consuming and conditionally branching 24 // on input bytes, consuming additional fields, and then interpreting 25 // consumed data as instruction arguments. The names of the xRead and xArg 26 // operations are taken from the Intel manual conventions, for example 27 // Volume 2, Section 3.1.1, page 487 of 28 // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf 29 // 30 // The actual decoding program is generated by ../x86map. 31 // 32 // TODO(rsc): We may be able to merge various of the memory operands 33 // since we don't care about, say, the distinction between m80dec and m80bcd. 34 // Similarly, mm and mm1 have identical meaning, as do xmm and xmm1. 35 36 type decodeOp uint16 37 38 const ( 39 xFail decodeOp = iota // invalid instruction (return) 40 xMatch // completed match 41 xJump // jump to pc 42 43 xCondByte // switch on instruction byte value 44 xCondSlashR // read and switch on instruction /r value 45 xCondPrefix // switch on presence of instruction prefix 46 xCondIs64 // switch on 64-bit processor mode 47 xCondDataSize // switch on operand size 48 xCondAddrSize // switch on address size 49 xCondIsMem // switch on memory vs register argument 50 51 xSetOp // set instruction opcode 52 53 xReadSlashR // read /r 54 xReadIb // read ib 55 xReadIw // read iw 56 xReadId // read id 57 xReadIo // read io 58 xReadCb // read cb 59 xReadCw // read cw 60 xReadCd // read cd 61 xReadCp // read cp 62 xReadCm // read cm 63 64 xArg1 // arg 1 65 xArg3 // arg 3 66 xArgAL // arg AL 67 xArgAX // arg AX 68 xArgCL // arg CL 69 xArgCR0dashCR7 // arg CR0-CR7 70 xArgCS // arg CS 71 xArgDR0dashDR7 // arg DR0-DR7 72 xArgDS // arg DS 73 xArgDX // arg DX 74 xArgEAX // arg EAX 75 xArgEDX // arg EDX 76 xArgES // arg ES 77 xArgFS // arg FS 78 xArgGS // arg GS 79 xArgImm16 // arg imm16 80 xArgImm32 // arg imm32 81 xArgImm64 // arg imm64 82 xArgImm8 // arg imm8 83 xArgImm8u // arg imm8 but record as unsigned 84 xArgImm16u // arg imm8 but record as unsigned 85 xArgM // arg m 86 xArgM128 // arg m128 87 xArgM1428byte // arg m14/28byte 88 xArgM16 // arg m16 89 xArgM16and16 // arg m16&16 90 xArgM16and32 // arg m16&32 91 xArgM16and64 // arg m16&64 92 xArgM16colon16 // arg m16:16 93 xArgM16colon32 // arg m16:32 94 xArgM16colon64 // arg m16:64 95 xArgM16int // arg m16int 96 xArgM2byte // arg m2byte 97 xArgM32 // arg m32 98 xArgM32and32 // arg m32&32 99 xArgM32fp // arg m32fp 100 xArgM32int // arg m32int 101 xArgM512byte // arg m512byte 102 xArgM64 // arg m64 103 xArgM64fp // arg m64fp 104 xArgM64int // arg m64int 105 xArgM8 // arg m8 106 xArgM80bcd // arg m80bcd 107 xArgM80dec // arg m80dec 108 xArgM80fp // arg m80fp 109 xArgM94108byte // arg m94/108byte 110 xArgMm // arg mm 111 xArgMm1 // arg mm1 112 xArgMm2 // arg mm2 113 xArgMm2M64 // arg mm2/m64 114 xArgMmM32 // arg mm/m32 115 xArgMmM64 // arg mm/m64 116 xArgMem // arg mem 117 xArgMoffs16 // arg moffs16 118 xArgMoffs32 // arg moffs32 119 xArgMoffs64 // arg moffs64 120 xArgMoffs8 // arg moffs8 121 xArgPtr16colon16 // arg ptr16:16 122 xArgPtr16colon32 // arg ptr16:32 123 xArgR16 // arg r16 124 xArgR16op // arg r16 with +rw in opcode 125 xArgR32 // arg r32 126 xArgR32M16 // arg r32/m16 127 xArgR32M8 // arg r32/m8 128 xArgR32op // arg r32 with +rd in opcode 129 xArgR64 // arg r64 130 xArgR64M16 // arg r64/m16 131 xArgR64op // arg r64 with +rd in opcode 132 xArgR8 // arg r8 133 xArgR8op // arg r8 with +rb in opcode 134 xArgRAX // arg RAX 135 xArgRDX // arg RDX 136 xArgRM // arg r/m 137 xArgRM16 // arg r/m16 138 xArgRM32 // arg r/m32 139 xArgRM64 // arg r/m64 140 xArgRM8 // arg r/m8 141 xArgReg // arg reg 142 xArgRegM16 // arg reg/m16 143 xArgRegM32 // arg reg/m32 144 xArgRegM8 // arg reg/m8 145 xArgRel16 // arg rel16 146 xArgRel32 // arg rel32 147 xArgRel8 // arg rel8 148 xArgSS // arg SS 149 xArgST // arg ST, aka ST(0) 150 xArgSTi // arg ST(i) with +i in opcode 151 xArgSreg // arg Sreg 152 xArgTR0dashTR7 // arg TR0-TR7 153 xArgXmm // arg xmm 154 xArgXMM0 // arg <XMM0> 155 xArgXmm1 // arg xmm1 156 xArgXmm2 // arg xmm2 157 xArgXmm2M128 // arg xmm2/m128 158 xArgXmm2M16 // arg xmm2/m16 159 xArgXmm2M32 // arg xmm2/m32 160 xArgXmm2M64 // arg xmm2/m64 161 xArgXmmM128 // arg xmm/m128 162 xArgXmmM32 // arg xmm/m32 163 xArgXmmM64 // arg xmm/m64 164 xArgRmf16 // arg r/m16 but force mod=3 165 xArgRmf32 // arg r/m32 but force mod=3 166 xArgRmf64 // arg r/m64 but force mod=3 167 ) 168 169 // instPrefix returns an Inst describing just one prefix byte. 170 // It is only used if there is a prefix followed by an unintelligible 171 // or invalid instruction byte sequence. 172 func instPrefix(b byte, mode int) (Inst, error) { 173 // When tracing it is useful to see what called instPrefix to report an error. 174 if trace { 175 _, file, line, _ := runtime.Caller(1) 176 fmt.Printf("%s:%d\n", file, line) 177 } 178 p := Prefix(b) 179 switch p { 180 case PrefixDataSize: 181 if mode == 16 { 182 p = PrefixData32 183 } else { 184 p = PrefixData16 185 } 186 case PrefixAddrSize: 187 if mode == 32 { 188 p = PrefixAddr16 189 } else { 190 p = PrefixAddr32 191 } 192 } 193 // Note: using composite literal with Prefix key confuses 'bundle' tool. 194 inst := Inst{Len: 1} 195 inst.Prefix = Prefixes{p} 196 return inst, nil 197 } 198 199 // truncated reports a truncated instruction. 200 // For now we use instPrefix but perhaps later we will return 201 // a specific error here. 202 func truncated(src []byte, mode int) (Inst, error) { 203 // return Inst{}, len(src), ErrTruncated 204 return instPrefix(src[0], mode) // too long 205 } 206 207 // These are the errors returned by Decode. 208 var ( 209 ErrInvalidMode = errors.New("invalid x86 mode in Decode") 210 ErrTruncated = errors.New("truncated instruction") 211 ErrUnrecognized = errors.New("unrecognized instruction") 212 ) 213 214 // decoderCover records coverage information for which parts 215 // of the byte code have been executed. 216 // TODO(rsc): This is for testing. Only use this if a flag is given. 217 var decoderCover []bool 218 219 // Decode decodes the leading bytes in src as a single instruction. 220 // The mode arguments specifies the assumed processor mode: 221 // 16, 32, or 64 for 16-, 32-, and 64-bit execution modes. 222 func Decode(src []byte, mode int) (inst Inst, err error) { 223 return decode1(src, mode, false) 224 } 225 226 // decode1 is the implementation of Decode but takes an extra 227 // gnuCompat flag to cause it to change its behavior to mimic 228 // bugs (or at least unique features) of GNU libopcodes as used 229 // by objdump. We don't believe that logic is the right thing to do 230 // in general, but when testing against libopcodes it simplifies the 231 // comparison if we adjust a few small pieces of logic. 232 // The affected logic is in the conditional branch for "mandatory" prefixes, 233 // case xCondPrefix. 234 func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) { 235 switch mode { 236 case 16, 32, 64: 237 // ok 238 // TODO(rsc): 64-bit mode not tested, probably not working. 239 default: 240 return Inst{}, ErrInvalidMode 241 } 242 243 // Maximum instruction size is 15 bytes. 244 // If we need to read more, return 'truncated instruction. 245 if len(src) > 15 { 246 src = src[:15] 247 } 248 249 var ( 250 // prefix decoding information 251 pos = 0 // position reading src 252 nprefix = 0 // number of prefixes 253 lockIndex = -1 // index of LOCK prefix in src and inst.Prefix 254 repIndex = -1 // index of REP/REPN prefix in src and inst.Prefix 255 segIndex = -1 // index of Group 2 prefix in src and inst.Prefix 256 dataSizeIndex = -1 // index of Group 3 prefix in src and inst.Prefix 257 addrSizeIndex = -1 // index of Group 4 prefix in src and inst.Prefix 258 rex Prefix // rex byte if present (or 0) 259 rexUsed Prefix // bits used in rex byte 260 rexIndex = -1 // index of rex byte 261 262 addrMode = mode // address mode (width in bits) 263 dataMode = mode // operand mode (width in bits) 264 265 // decoded ModR/M fields 266 haveModrm bool 267 modrm int 268 mod int 269 regop int 270 rm int 271 272 // if ModR/M is memory reference, Mem form 273 mem Mem 274 haveMem bool 275 276 // decoded SIB fields 277 haveSIB bool 278 sib int 279 scale int 280 index int 281 base int 282 283 // decoded immediate values 284 imm int64 285 imm8 int8 286 immc int64 287 288 // output 289 opshift int 290 inst Inst 291 narg int // number of arguments written to inst 292 ) 293 294 if mode == 64 { 295 dataMode = 32 296 } 297 298 // Prefixes are certainly the most complex and underspecified part of 299 // decoding x86 instructions. Although the manuals say things like 300 // up to four prefixes, one from each group, nearly everyone seems to 301 // agree that in practice as many prefixes as possible, including multiple 302 // from a particular group or repetitions of a given prefix, can be used on 303 // an instruction, provided the total instruction length including prefixes 304 // does not exceed the agreed-upon maximum of 15 bytes. 305 // Everyone also agrees that if one of these prefixes is the LOCK prefix 306 // and the instruction is not one of the instructions that can be used with 307 // the LOCK prefix or if the destination is not a memory operand, 308 // then the instruction is invalid and produces the #UD exception. 309 // However, that is the end of any semblance of agreement. 310 // 311 // What happens if prefixes are given that conflict with other prefixes? 312 // For example, the memory segment overrides CS, DS, ES, FS, GS, SS 313 // conflict with each other: only one segment can be in effect. 314 // Disassemblers seem to agree that later prefixes take priority over 315 // earlier ones. I have not taken the time to write assembly programs 316 // to check to see if the hardware agrees. 317 // 318 // What happens if prefixes are given that have no meaning for the 319 // specific instruction to which they are attached? It depends. 320 // If they really have no meaning, they are ignored. However, a future 321 // processor may assign a different meaning. As a disassembler, we 322 // don't really know whether we're seeing a meaningless prefix or one 323 // whose meaning we simply haven't been told yet. 324 // 325 // Combining the two questions, what happens when conflicting 326 // extension prefixes are given? No one seems to know for sure. 327 // For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r, 328 // and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'? 329 // Which prefix wins? See the xCondPrefix prefix for more. 330 // 331 // Writing assembly test cases to divine which interpretation the 332 // CPU uses might clarify the situation, but more likely it would 333 // make the situation even less clear. 334 335 // Read non-REX prefixes. 336 ReadPrefixes: 337 for ; pos < len(src); pos++ { 338 p := Prefix(src[pos]) 339 switch p { 340 default: 341 nprefix = pos 342 break ReadPrefixes 343 344 // Group 1 - lock and repeat prefixes 345 // According to Intel, there should only be one from this set, 346 // but according to AMD both can be present. 347 case 0xF0: 348 if lockIndex >= 0 { 349 inst.Prefix[lockIndex] |= PrefixIgnored 350 } 351 lockIndex = pos 352 case 0xF2, 0xF3: 353 if repIndex >= 0 { 354 inst.Prefix[repIndex] |= PrefixIgnored 355 } 356 repIndex = pos 357 358 // Group 2 - segment override / branch hints 359 case 0x26, 0x2E, 0x36, 0x3E: 360 if mode == 64 { 361 p |= PrefixIgnored 362 break 363 } 364 fallthrough 365 case 0x64, 0x65: 366 if segIndex >= 0 { 367 inst.Prefix[segIndex] |= PrefixIgnored 368 } 369 segIndex = pos 370 371 // Group 3 - operand size override 372 case 0x66: 373 if mode == 16 { 374 dataMode = 32 375 p = PrefixData32 376 } else { 377 dataMode = 16 378 p = PrefixData16 379 } 380 if dataSizeIndex >= 0 { 381 inst.Prefix[dataSizeIndex] |= PrefixIgnored 382 } 383 dataSizeIndex = pos 384 385 // Group 4 - address size override 386 case 0x67: 387 if mode == 32 { 388 addrMode = 16 389 p = PrefixAddr16 390 } else { 391 addrMode = 32 392 p = PrefixAddr32 393 } 394 if addrSizeIndex >= 0 { 395 inst.Prefix[addrSizeIndex] |= PrefixIgnored 396 } 397 addrSizeIndex = pos 398 } 399 400 if pos >= len(inst.Prefix) { 401 return instPrefix(src[0], mode) // too long 402 } 403 404 inst.Prefix[pos] = p 405 } 406 407 // Read REX prefix. 408 if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() { 409 rex = Prefix(src[pos]) 410 rexIndex = pos 411 if pos >= len(inst.Prefix) { 412 return instPrefix(src[0], mode) // too long 413 } 414 inst.Prefix[pos] = rex 415 pos++ 416 if rex&PrefixREXW != 0 { 417 dataMode = 64 418 if dataSizeIndex >= 0 { 419 inst.Prefix[dataSizeIndex] |= PrefixIgnored 420 } 421 } 422 } 423 424 // Decode instruction stream, interpreting decoding instructions. 425 // opshift gives the shift to use when saving the next 426 // opcode byte into inst.Opcode. 427 opshift = 24 428 if decoderCover == nil { 429 decoderCover = make([]bool, len(decoder)) 430 } 431 432 // Decode loop, executing decoder program. 433 var oldPC, prevPC int 434 Decode: 435 for pc := 1; ; { // TODO uint 436 oldPC = prevPC 437 prevPC = pc 438 if trace { 439 println("run", pc) 440 } 441 x := decoder[pc] 442 decoderCover[pc] = true 443 pc++ 444 445 // Read and decode ModR/M if needed by opcode. 446 switch decodeOp(x) { 447 case xCondSlashR, xReadSlashR: 448 if haveModrm { 449 return Inst{Len: pos}, errInternal 450 } 451 haveModrm = true 452 if pos >= len(src) { 453 return truncated(src, mode) 454 } 455 modrm = int(src[pos]) 456 pos++ 457 if opshift >= 0 { 458 inst.Opcode |= uint32(modrm) << uint(opshift) 459 opshift -= 8 460 } 461 mod = modrm >> 6 462 regop = (modrm >> 3) & 07 463 rm = modrm & 07 464 if rex&PrefixREXR != 0 { 465 rexUsed |= PrefixREXR 466 regop |= 8 467 } 468 if addrMode == 16 { 469 // 16-bit modrm form 470 if mod != 3 { 471 haveMem = true 472 mem = addr16[rm] 473 if rm == 6 && mod == 0 { 474 mem.Base = 0 475 } 476 477 // Consume disp16 if present. 478 if mod == 0 && rm == 6 || mod == 2 { 479 if pos+2 > len(src) { 480 return truncated(src, mode) 481 } 482 mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:])) 483 pos += 2 484 } 485 486 // Consume disp8 if present. 487 if mod == 1 { 488 if pos >= len(src) { 489 return truncated(src, mode) 490 } 491 mem.Disp = int64(int8(src[pos])) 492 pos++ 493 } 494 } 495 } else { 496 haveMem = mod != 3 497 498 // 32-bit or 64-bit form 499 // Consume SIB encoding if present. 500 if rm == 4 && mod != 3 { 501 haveSIB = true 502 if pos >= len(src) { 503 return truncated(src, mode) 504 } 505 sib = int(src[pos]) 506 pos++ 507 if opshift >= 0 { 508 inst.Opcode |= uint32(sib) << uint(opshift) 509 opshift -= 8 510 } 511 scale = sib >> 6 512 index = (sib >> 3) & 07 513 base = sib & 07 514 if rex&PrefixREXB != 0 { 515 rexUsed |= PrefixREXB 516 base |= 8 517 } 518 if rex&PrefixREXX != 0 { 519 rexUsed |= PrefixREXX 520 index |= 8 521 } 522 523 mem.Scale = 1 << uint(scale) 524 if index == 4 { 525 // no mem.Index 526 } else { 527 mem.Index = baseRegForBits(addrMode) + Reg(index) 528 } 529 if base&7 == 5 && mod == 0 { 530 // no mem.Base 531 } else { 532 mem.Base = baseRegForBits(addrMode) + Reg(base) 533 } 534 } else { 535 if rex&PrefixREXB != 0 { 536 rexUsed |= PrefixREXB 537 rm |= 8 538 } 539 if mod == 0 && rm&7 == 5 || rm&7 == 4 { 540 // base omitted 541 } else if mod != 3 { 542 mem.Base = baseRegForBits(addrMode) + Reg(rm) 543 } 544 } 545 546 // Consume disp32 if present. 547 if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 { 548 if pos+4 > len(src) { 549 return truncated(src, mode) 550 } 551 mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:])) 552 pos += 4 553 } 554 555 // Consume disp8 if present. 556 if mod == 1 { 557 if pos >= len(src) { 558 return truncated(src, mode) 559 } 560 mem.Disp = int64(int8(src[pos])) 561 pos++ 562 } 563 564 // In 64-bit, mod=0 rm=5 is PC-relative instead of just disp. 565 // See Vol 2A. Table 2-7. 566 if mode == 64 && mod == 0 && rm&7 == 5 { 567 if addrMode == 32 { 568 mem.Base = EIP 569 } else { 570 mem.Base = RIP 571 } 572 } 573 } 574 575 if segIndex >= 0 { 576 mem.Segment = prefixToSegment(inst.Prefix[segIndex]) 577 } 578 } 579 580 // Execute single opcode. 581 switch decodeOp(x) { 582 default: 583 println("bad op", x, "at", pc-1, "from", oldPC) 584 return Inst{Len: pos}, errInternal 585 586 case xFail: 587 inst.Op = 0 588 break Decode 589 590 case xMatch: 591 break Decode 592 593 case xJump: 594 pc = int(decoder[pc]) 595 596 // Conditional branches. 597 598 case xCondByte: 599 if pos >= len(src) { 600 return truncated(src, mode) 601 } 602 b := src[pos] 603 n := int(decoder[pc]) 604 pc++ 605 for i := 0; i < n; i++ { 606 xb, xpc := decoder[pc], int(decoder[pc+1]) 607 pc += 2 608 if b == byte(xb) { 609 pc = xpc 610 pos++ 611 if opshift >= 0 { 612 inst.Opcode |= uint32(b) << uint(opshift) 613 opshift -= 8 614 } 615 continue Decode 616 } 617 } 618 // xCondByte is the only conditional with a fall through, 619 // so that it can be used to pick off special cases before 620 // an xCondSlash. If the fallthrough instruction is xFail, 621 // advance the position so that the decoded instruction 622 // size includes the byte we just compared against. 623 if decodeOp(decoder[pc]) == xJump { 624 pc = int(decoder[pc+1]) 625 } 626 if decodeOp(decoder[pc]) == xFail { 627 pos++ 628 } 629 630 case xCondIs64: 631 if mode == 64 { 632 pc = int(decoder[pc+1]) 633 } else { 634 pc = int(decoder[pc]) 635 } 636 637 case xCondIsMem: 638 mem := haveMem 639 if !haveModrm { 640 if pos >= len(src) { 641 return instPrefix(src[0], mode) // too long 642 } 643 mem = src[pos]>>6 != 3 644 } 645 if mem { 646 pc = int(decoder[pc+1]) 647 } else { 648 pc = int(decoder[pc]) 649 } 650 651 case xCondDataSize: 652 switch dataMode { 653 case 16: 654 if dataSizeIndex >= 0 { 655 inst.Prefix[dataSizeIndex] |= PrefixImplicit 656 } 657 pc = int(decoder[pc]) 658 case 32: 659 if dataSizeIndex >= 0 { 660 inst.Prefix[dataSizeIndex] |= PrefixImplicit 661 } 662 pc = int(decoder[pc+1]) 663 case 64: 664 rexUsed |= PrefixREXW 665 pc = int(decoder[pc+2]) 666 } 667 668 case xCondAddrSize: 669 switch addrMode { 670 case 16: 671 if addrSizeIndex >= 0 { 672 inst.Prefix[addrSizeIndex] |= PrefixImplicit 673 } 674 pc = int(decoder[pc]) 675 case 32: 676 if addrSizeIndex >= 0 { 677 inst.Prefix[addrSizeIndex] |= PrefixImplicit 678 } 679 pc = int(decoder[pc+1]) 680 case 64: 681 pc = int(decoder[pc+2]) 682 } 683 684 case xCondPrefix: 685 // Conditional branch based on presence or absence of prefixes. 686 // The conflict cases here are completely undocumented and 687 // differ significantly between GNU libopcodes and Intel xed. 688 // I have not written assembly code to divine what various CPUs 689 // do, but it wouldn't surprise me if they are not consistent either. 690 // 691 // The basic idea is to switch on the presence of a prefix, so that 692 // for example: 693 // 694 // xCondPrefix, 4 695 // 0xF3, 123, 696 // 0xF2, 234, 697 // 0x66, 345, 698 // 0, 456 699 // 700 // branch to 123 if the F3 prefix is present, 234 if the F2 prefix 701 // is present, 66 if the 345 prefix is present, and 456 otherwise. 702 // The prefixes are given in descending order so that the 0 will be last. 703 // 704 // It is unclear what should happen if multiple conditions are 705 // satisfied: what if F2 and F3 are both present, or if 66 and F2 706 // are present, or if all three are present? The one chosen becomes 707 // part of the opcode and the others do not. Perhaps the answer 708 // depends on the specific opcodes in question. 709 // 710 // The only clear example is that CRC32 is F2 0F 38 F1 /r, and 711 // it comes in 16-bit and 32-bit forms based on the 66 prefix, 712 // so 66 F2 0F 38 F1 /r should be treated as F2 taking priority, 713 // with the 66 being only an operand size override, and probably 714 // F2 66 0F 38 F1 /r should be treated the same. 715 // Perhaps that rule is specific to the case of CRC32, since no 716 // 66 0F 38 F1 instruction is defined (today) (that we know of). 717 // However, both libopcodes and xed seem to generalize this 718 // example and choose F2/F3 in preference to 66, and we 719 // do the same. 720 // 721 // Next, what if both F2 and F3 are present? Which wins? 722 // The Intel xed rule, and ours, is that the one that occurs last wins. 723 // The GNU libopcodes rule, which we implement only in gnuCompat mode, 724 // is that F3 beats F2 unless F3 has no special meaning, in which 725 // case F3 can be a modified on an F2 special meaning. 726 // 727 // Concretely, 728 // 66 0F D6 /r is MOVQ 729 // F2 0F D6 /r is MOVDQ2Q 730 // F3 0F D6 /r is MOVQ2DQ. 731 // 732 // F2 66 0F D6 /r is 66 + MOVDQ2Q always. 733 // 66 F2 0F D6 /r is 66 + MOVDQ2Q always. 734 // F3 66 0F D6 /r is 66 + MOVQ2DQ always. 735 // 66 F3 0F D6 /r is 66 + MOVQ2DQ always. 736 // F2 F3 0F D6 /r is F2 + MOVQ2DQ always. 737 // F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes. 738 // Adding 66 anywhere in the prefix section of the 739 // last two cases does not change the outcome. 740 // 741 // Finally, what if there is a variant in which 66 is a mandatory 742 // prefix rather than an operand size override, but we know of 743 // no corresponding F2/F3 form, and we see both F2/F3 and 66. 744 // Does F2/F3 still take priority, so that the result is an unknown 745 // instruction, or does the 66 take priority, so that the extended 746 // 66 instruction should be interpreted as having a REP/REPN prefix? 747 // Intel xed does the former and GNU libopcodes does the latter. 748 // We side with Intel xed, unless we are trying to match libopcodes 749 // more closely during the comparison-based test suite. 750 // 751 // In 64-bit mode REX.W is another valid prefix to test for, but 752 // there is less ambiguity about that. When present, REX.W is 753 // always the first entry in the table. 754 n := int(decoder[pc]) 755 pc++ 756 sawF3 := false 757 for j := 0; j < n; j++ { 758 prefix := Prefix(decoder[pc+2*j]) 759 if prefix.IsREX() { 760 rexUsed |= prefix 761 if rex&prefix == prefix { 762 pc = int(decoder[pc+2*j+1]) 763 continue Decode 764 } 765 continue 766 } 767 ok := false 768 if prefix == 0 { 769 ok = true 770 } else if prefix.IsREX() { 771 rexUsed |= prefix 772 if rex&prefix == prefix { 773 ok = true 774 } 775 } else { 776 if prefix == 0xF3 { 777 sawF3 = true 778 } 779 switch prefix { 780 case PrefixLOCK: 781 if lockIndex >= 0 { 782 inst.Prefix[lockIndex] |= PrefixImplicit 783 ok = true 784 } 785 case PrefixREP, PrefixREPN: 786 if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix { 787 inst.Prefix[repIndex] |= PrefixImplicit 788 ok = true 789 } 790 if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) { 791 // Check to see if earlier prefix F3 is present. 792 for i := repIndex - 1; i >= 0; i-- { 793 if inst.Prefix[i]&0xFF == prefix { 794 inst.Prefix[i] |= PrefixImplicit 795 ok = true 796 } 797 } 798 } 799 if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 { 800 // Check to see if earlier prefix F2 is present. 801 for i := repIndex - 1; i >= 0; i-- { 802 if inst.Prefix[i]&0xFF == prefix { 803 inst.Prefix[i] |= PrefixImplicit 804 ok = true 805 } 806 } 807 } 808 case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS: 809 if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix { 810 inst.Prefix[segIndex] |= PrefixImplicit 811 ok = true 812 } 813 case PrefixDataSize: 814 // Looking for 66 mandatory prefix. 815 // The F2/F3 mandatory prefixes take priority when both are present. 816 // If we got this far in the xCondPrefix table and an F2/F3 is present, 817 // it means the table didn't have any entry for that prefix. But if 66 has 818 // special meaning, perhaps F2/F3 have special meaning that we don't know. 819 // Intel xed works this way, treating the F2/F3 as inhibiting the 66. 820 // GNU libopcodes allows the 66 to match. We do what Intel xed does 821 // except in gnuCompat mode. 822 if repIndex >= 0 && !gnuCompat { 823 inst.Op = 0 824 break Decode 825 } 826 if dataSizeIndex >= 0 { 827 inst.Prefix[dataSizeIndex] |= PrefixImplicit 828 ok = true 829 } 830 case PrefixAddrSize: 831 if addrSizeIndex >= 0 { 832 inst.Prefix[addrSizeIndex] |= PrefixImplicit 833 ok = true 834 } 835 } 836 } 837 if ok { 838 pc = int(decoder[pc+2*j+1]) 839 continue Decode 840 } 841 } 842 inst.Op = 0 843 break Decode 844 845 case xCondSlashR: 846 pc = int(decoder[pc+regop&7]) 847 848 // Input. 849 850 case xReadSlashR: 851 // done above 852 853 case xReadIb: 854 if pos >= len(src) { 855 return truncated(src, mode) 856 } 857 imm8 = int8(src[pos]) 858 pos++ 859 860 case xReadIw: 861 if pos+2 > len(src) { 862 return truncated(src, mode) 863 } 864 imm = int64(binary.LittleEndian.Uint16(src[pos:])) 865 pos += 2 866 867 case xReadId: 868 if pos+4 > len(src) { 869 return truncated(src, mode) 870 } 871 imm = int64(binary.LittleEndian.Uint32(src[pos:])) 872 pos += 4 873 874 case xReadIo: 875 if pos+8 > len(src) { 876 return truncated(src, mode) 877 } 878 imm = int64(binary.LittleEndian.Uint64(src[pos:])) 879 pos += 8 880 881 case xReadCb: 882 if pos >= len(src) { 883 return truncated(src, mode) 884 } 885 immc = int64(src[pos]) 886 pos++ 887 888 case xReadCw: 889 if pos+2 > len(src) { 890 return truncated(src, mode) 891 } 892 immc = int64(binary.LittleEndian.Uint16(src[pos:])) 893 pos += 2 894 895 case xReadCm: 896 if addrMode == 16 { 897 if pos+2 > len(src) { 898 return truncated(src, mode) 899 } 900 immc = int64(binary.LittleEndian.Uint16(src[pos:])) 901 pos += 2 902 } else if addrMode == 32 { 903 if pos+4 > len(src) { 904 return truncated(src, mode) 905 } 906 immc = int64(binary.LittleEndian.Uint32(src[pos:])) 907 pos += 4 908 } else { 909 if pos+8 > len(src) { 910 return truncated(src, mode) 911 } 912 immc = int64(binary.LittleEndian.Uint64(src[pos:])) 913 pos += 8 914 } 915 case xReadCd: 916 if pos+4 > len(src) { 917 return truncated(src, mode) 918 } 919 immc = int64(binary.LittleEndian.Uint32(src[pos:])) 920 pos += 4 921 922 case xReadCp: 923 if pos+6 > len(src) { 924 return truncated(src, mode) 925 } 926 w := binary.LittleEndian.Uint32(src[pos:]) 927 w2 := binary.LittleEndian.Uint16(src[pos+4:]) 928 immc = int64(w2)<<32 | int64(w) 929 pos += 6 930 931 // Output. 932 933 case xSetOp: 934 inst.Op = Op(decoder[pc]) 935 pc++ 936 937 case xArg1, 938 xArg3, 939 xArgAL, 940 xArgAX, 941 xArgCL, 942 xArgCS, 943 xArgDS, 944 xArgDX, 945 xArgEAX, 946 xArgEDX, 947 xArgES, 948 xArgFS, 949 xArgGS, 950 xArgRAX, 951 xArgRDX, 952 xArgSS, 953 xArgST, 954 xArgXMM0: 955 inst.Args[narg] = fixedArg[x] 956 narg++ 957 958 case xArgImm8: 959 inst.Args[narg] = Imm(imm8) 960 narg++ 961 962 case xArgImm8u: 963 inst.Args[narg] = Imm(uint8(imm8)) 964 narg++ 965 966 case xArgImm16: 967 inst.Args[narg] = Imm(int16(imm)) 968 narg++ 969 970 case xArgImm16u: 971 inst.Args[narg] = Imm(uint16(imm)) 972 narg++ 973 974 case xArgImm32: 975 inst.Args[narg] = Imm(int32(imm)) 976 narg++ 977 978 case xArgImm64: 979 inst.Args[narg] = Imm(imm) 980 narg++ 981 982 case xArgM, 983 xArgM128, 984 xArgM1428byte, 985 xArgM16, 986 xArgM16and16, 987 xArgM16and32, 988 xArgM16and64, 989 xArgM16colon16, 990 xArgM16colon32, 991 xArgM16colon64, 992 xArgM16int, 993 xArgM2byte, 994 xArgM32, 995 xArgM32and32, 996 xArgM32fp, 997 xArgM32int, 998 xArgM512byte, 999 xArgM64, 1000 xArgM64fp, 1001 xArgM64int, 1002 xArgM8, 1003 xArgM80bcd, 1004 xArgM80dec, 1005 xArgM80fp, 1006 xArgM94108byte, 1007 xArgMem: 1008 if !haveMem { 1009 inst.Op = 0 1010 break Decode 1011 } 1012 inst.Args[narg] = mem 1013 inst.MemBytes = int(memBytes[decodeOp(x)]) 1014 narg++ 1015 1016 case xArgPtr16colon16: 1017 inst.Args[narg] = Imm(immc >> 16) 1018 inst.Args[narg+1] = Imm(immc & (1<<16 - 1)) 1019 narg += 2 1020 1021 case xArgPtr16colon32: 1022 inst.Args[narg] = Imm(immc >> 32) 1023 inst.Args[narg+1] = Imm(immc & (1<<32 - 1)) 1024 narg += 2 1025 1026 case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64: 1027 // TODO(rsc): Can address be 64 bits? 1028 mem = Mem{Disp: int64(immc)} 1029 if segIndex >= 0 { 1030 mem.Segment = prefixToSegment(inst.Prefix[segIndex]) 1031 inst.Prefix[segIndex] |= PrefixImplicit 1032 } 1033 inst.Args[narg] = mem 1034 inst.MemBytes = int(memBytes[decodeOp(x)]) 1035 narg++ 1036 1037 case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7: 1038 base := baseReg[x] 1039 index := Reg(regop) 1040 if rex != 0 && base == AL && index >= 4 { 1041 rexUsed |= PrefixREX 1042 index -= 4 1043 base = SPB 1044 } 1045 inst.Args[narg] = base + index 1046 narg++ 1047 1048 case xArgMm, xArgMm1, xArgTR0dashTR7: 1049 inst.Args[narg] = baseReg[x] + Reg(regop&7) 1050 narg++ 1051 1052 case xArgCR0dashCR7: 1053 // AMD documents an extension that the LOCK prefix 1054 // can be used in place of a REX prefix in order to access 1055 // CR8 from 32-bit mode. The LOCK prefix is allowed in 1056 // all modes, provided the corresponding CPUID bit is set. 1057 if lockIndex >= 0 { 1058 inst.Prefix[lockIndex] |= PrefixImplicit 1059 regop += 8 1060 } 1061 inst.Args[narg] = CR0 + Reg(regop) 1062 narg++ 1063 1064 case xArgSreg: 1065 regop &= 7 1066 if regop >= 6 { 1067 inst.Op = 0 1068 break Decode 1069 } 1070 inst.Args[narg] = ES + Reg(regop) 1071 narg++ 1072 1073 case xArgRmf16, xArgRmf32, xArgRmf64: 1074 base := baseReg[x] 1075 index := Reg(modrm & 07) 1076 if rex&PrefixREXB != 0 { 1077 rexUsed |= PrefixREXB 1078 index += 8 1079 } 1080 inst.Args[narg] = base + index 1081 narg++ 1082 1083 case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi: 1084 n := inst.Opcode >> uint(opshift+8) & 07 1085 base := baseReg[x] 1086 index := Reg(n) 1087 if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi { 1088 rexUsed |= PrefixREXB 1089 index += 8 1090 } 1091 if rex != 0 && base == AL && index >= 4 { 1092 rexUsed |= PrefixREX 1093 index -= 4 1094 base = SPB 1095 } 1096 inst.Args[narg] = base + index 1097 narg++ 1098 1099 case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16, 1100 xArgMmM32, xArgMmM64, xArgMm2M64, 1101 xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128: 1102 if haveMem { 1103 inst.Args[narg] = mem 1104 inst.MemBytes = int(memBytes[decodeOp(x)]) 1105 } else { 1106 base := baseReg[x] 1107 index := Reg(rm) 1108 switch decodeOp(x) { 1109 case xArgMmM32, xArgMmM64, xArgMm2M64: 1110 // There are only 8 MMX registers, so these ignore the REX.X bit. 1111 index &= 7 1112 case xArgRM8: 1113 if rex != 0 && index >= 4 { 1114 rexUsed |= PrefixREX 1115 index -= 4 1116 base = SPB 1117 } 1118 } 1119 inst.Args[narg] = base + index 1120 } 1121 narg++ 1122 1123 case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag 1124 if haveMem { 1125 inst.Op = 0 1126 break Decode 1127 } 1128 inst.Args[narg] = baseReg[x] + Reg(rm&7) 1129 narg++ 1130 1131 case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag 1132 if haveMem { 1133 inst.Op = 0 1134 break Decode 1135 } 1136 inst.Args[narg] = baseReg[x] + Reg(rm) 1137 narg++ 1138 1139 case xArgRel8: 1140 inst.Args[narg] = Rel(int8(immc)) 1141 narg++ 1142 1143 case xArgRel16: 1144 inst.Args[narg] = Rel(int16(immc)) 1145 narg++ 1146 1147 case xArgRel32: 1148 inst.Args[narg] = Rel(int32(immc)) 1149 narg++ 1150 } 1151 } 1152 1153 if inst.Op == 0 { 1154 // Invalid instruction. 1155 if nprefix > 0 { 1156 return instPrefix(src[0], mode) // invalid instruction 1157 } 1158 return Inst{Len: pos}, ErrUnrecognized 1159 } 1160 1161 // Matched! Hooray! 1162 1163 // 90 decodes as XCHG EAX, EAX but is NOP. 1164 // 66 90 decodes as XCHG AX, AX and is NOP too. 1165 // 48 90 decodes as XCHG RAX, RAX and is NOP too. 1166 // 43 90 decodes as XCHG R8D, EAX and is *not* NOP. 1167 // F3 90 decodes as REP XCHG EAX, EAX but is PAUSE. 1168 // It's all too special to handle in the decoding tables, at least for now. 1169 if inst.Op == XCHG && inst.Opcode>>24 == 0x90 { 1170 if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX { 1171 inst.Op = NOP 1172 if dataSizeIndex >= 0 { 1173 inst.Prefix[dataSizeIndex] &^= PrefixImplicit 1174 } 1175 inst.Args[0] = nil 1176 inst.Args[1] = nil 1177 } 1178 if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 { 1179 inst.Prefix[repIndex] |= PrefixImplicit 1180 inst.Op = PAUSE 1181 inst.Args[0] = nil 1182 inst.Args[1] = nil 1183 } else if gnuCompat { 1184 for i := nprefix - 1; i >= 0; i-- { 1185 if inst.Prefix[i]&0xFF == 0xF3 { 1186 inst.Prefix[i] |= PrefixImplicit 1187 inst.Op = PAUSE 1188 inst.Args[0] = nil 1189 inst.Args[1] = nil 1190 break 1191 } 1192 } 1193 } 1194 } 1195 1196 // defaultSeg returns the default segment for an implicit 1197 // memory reference: the final override if present, or else DS. 1198 defaultSeg := func() Reg { 1199 if segIndex >= 0 { 1200 inst.Prefix[segIndex] |= PrefixImplicit 1201 return prefixToSegment(inst.Prefix[segIndex]) 1202 } 1203 return DS 1204 } 1205 1206 // Add implicit arguments not present in the tables. 1207 // Normally we shy away from making implicit arguments explicit, 1208 // following the Intel manuals, but adding the arguments seems 1209 // the best way to express the effect of the segment override prefixes. 1210 // TODO(rsc): Perhaps add these to the tables and 1211 // create bytecode instructions for them. 1212 usedAddrSize := false 1213 switch inst.Op { 1214 case INSB, INSW, INSD: 1215 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1216 inst.Args[1] = DX 1217 usedAddrSize = true 1218 1219 case OUTSB, OUTSW, OUTSD: 1220 inst.Args[0] = DX 1221 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1222 usedAddrSize = true 1223 1224 case MOVSB, MOVSW, MOVSD, MOVSQ: 1225 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1226 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1227 usedAddrSize = true 1228 1229 case CMPSB, CMPSW, CMPSD, CMPSQ: 1230 inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1231 inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1232 usedAddrSize = true 1233 1234 case LODSB, LODSW, LODSD, LODSQ: 1235 switch inst.Op { 1236 case LODSB: 1237 inst.Args[0] = AL 1238 case LODSW: 1239 inst.Args[0] = AX 1240 case LODSD: 1241 inst.Args[0] = EAX 1242 case LODSQ: 1243 inst.Args[0] = RAX 1244 } 1245 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1246 usedAddrSize = true 1247 1248 case STOSB, STOSW, STOSD, STOSQ: 1249 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1250 switch inst.Op { 1251 case STOSB: 1252 inst.Args[1] = AL 1253 case STOSW: 1254 inst.Args[1] = AX 1255 case STOSD: 1256 inst.Args[1] = EAX 1257 case STOSQ: 1258 inst.Args[1] = RAX 1259 } 1260 usedAddrSize = true 1261 1262 case SCASB, SCASW, SCASD, SCASQ: 1263 inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1264 switch inst.Op { 1265 case SCASB: 1266 inst.Args[0] = AL 1267 case SCASW: 1268 inst.Args[0] = AX 1269 case SCASD: 1270 inst.Args[0] = EAX 1271 case SCASQ: 1272 inst.Args[0] = RAX 1273 } 1274 usedAddrSize = true 1275 1276 case XLATB: 1277 inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX} 1278 usedAddrSize = true 1279 } 1280 1281 // If we used the address size annotation to construct the 1282 // argument list, mark that prefix as implicit: it doesn't need 1283 // to be shown when printing the instruction. 1284 if haveMem || usedAddrSize { 1285 if addrSizeIndex >= 0 { 1286 inst.Prefix[addrSizeIndex] |= PrefixImplicit 1287 } 1288 } 1289 1290 // Similarly, if there's some memory operand, the segment 1291 // will be shown there and doesn't need to be shown as an 1292 // explicit prefix. 1293 if haveMem { 1294 if segIndex >= 0 { 1295 inst.Prefix[segIndex] |= PrefixImplicit 1296 } 1297 } 1298 1299 // Branch predict prefixes are overloaded segment prefixes, 1300 // since segment prefixes don't make sense on conditional jumps. 1301 // Rewrite final instance to prediction prefix. 1302 // The set of instructions to which the prefixes apply (other then the 1303 // Jcc conditional jumps) is not 100% clear from the manuals, but 1304 // the disassemblers seem to agree about the LOOP and JCXZ instructions, 1305 // so we'll follow along. 1306 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1307 if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ { 1308 PredictLoop: 1309 for i := nprefix - 1; i >= 0; i-- { 1310 p := inst.Prefix[i] 1311 switch p & 0xFF { 1312 case PrefixCS: 1313 inst.Prefix[i] = PrefixPN 1314 break PredictLoop 1315 case PrefixDS: 1316 inst.Prefix[i] = PrefixPT 1317 break PredictLoop 1318 } 1319 } 1320 } 1321 1322 // The BND prefix is part of the Intel Memory Protection Extensions (MPX). 1323 // A REPN applied to certain control transfers is a BND prefix to bound 1324 // the range of possible destinations. There's surprisingly little documentation 1325 // about this, so we just do what libopcodes and xed agree on. 1326 // In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions 1327 // does not turn into a BND. 1328 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1329 if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET { 1330 for i := nprefix - 1; i >= 0; i-- { 1331 p := inst.Prefix[i] 1332 if p&^PrefixIgnored == PrefixREPN { 1333 inst.Prefix[i] = PrefixBND 1334 break 1335 } 1336 } 1337 } 1338 1339 // The LOCK prefix only applies to certain instructions, and then only 1340 // to instances of the instruction with a memory destination. 1341 // Other uses of LOCK are invalid and cause a processor exception, 1342 // in contrast to the "just ignore it" spirit applied to all other prefixes. 1343 // Mark invalid lock prefixes. 1344 hasLock := false 1345 if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 { 1346 switch inst.Op { 1347 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1348 case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG: 1349 if isMem(inst.Args[0]) { 1350 hasLock = true 1351 break 1352 } 1353 fallthrough 1354 default: 1355 inst.Prefix[lockIndex] |= PrefixInvalid 1356 } 1357 } 1358 1359 // In certain cases, all of which require a memory destination, 1360 // the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE 1361 // from the Intel Transactional Synchroniation Extensions (TSX). 1362 // 1363 // The specific rules are: 1364 // (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE. 1365 // (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE. 1366 // (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE. 1367 if isMem(inst.Args[0]) { 1368 if inst.Op == XCHG { 1369 hasLock = true 1370 } 1371 1372 for i := len(inst.Prefix) - 1; i >= 0; i-- { 1373 p := inst.Prefix[i] &^ PrefixIgnored 1374 switch p { 1375 case PrefixREPN: 1376 if hasLock { 1377 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE 1378 } 1379 1380 case PrefixREP: 1381 if hasLock { 1382 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE 1383 } 1384 1385 if inst.Op == MOV { 1386 op := (inst.Opcode >> 24) &^ 1 1387 if op == 0x88 || op == 0xC6 { 1388 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE 1389 } 1390 } 1391 } 1392 } 1393 } 1394 1395 // If REP is used on a non-REP-able instruction, mark the prefix as ignored. 1396 if repIndex >= 0 { 1397 switch inst.Prefix[repIndex] { 1398 case PrefixREP, PrefixREPN: 1399 switch inst.Op { 1400 // According to the manuals, the REP/REPE prefix applies to all of these, 1401 // while the REPN applies only to some of them. However, both libopcodes 1402 // and xed show both prefixes explicitly for all instructions, so we do the same. 1403 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1404 case INSB, INSW, INSD, 1405 MOVSB, MOVSW, MOVSD, MOVSQ, 1406 OUTSB, OUTSW, OUTSD, 1407 LODSB, LODSW, LODSD, LODSQ, 1408 CMPSB, CMPSW, CMPSD, CMPSQ, 1409 SCASB, SCASW, SCASD, SCASQ, 1410 STOSB, STOSW, STOSD, STOSQ: 1411 // ok 1412 default: 1413 inst.Prefix[repIndex] |= PrefixIgnored 1414 } 1415 } 1416 } 1417 1418 // If REX was present, mark implicit if all the 1 bits were consumed. 1419 if rexIndex >= 0 { 1420 if rexUsed != 0 { 1421 rexUsed |= PrefixREX 1422 } 1423 if rex&^rexUsed == 0 { 1424 inst.Prefix[rexIndex] |= PrefixImplicit 1425 } 1426 } 1427 1428 inst.DataSize = dataMode 1429 inst.AddrSize = addrMode 1430 inst.Mode = mode 1431 inst.Len = pos 1432 return inst, nil 1433 } 1434 1435 var errInternal = errors.New("internal error") 1436 1437 // addr16 records the eight 16-bit addressing modes. 1438 var addr16 = [8]Mem{ 1439 {Base: BX, Scale: 1, Index: SI}, 1440 {Base: BX, Scale: 1, Index: DI}, 1441 {Base: BP, Scale: 1, Index: SI}, 1442 {Base: BP, Scale: 1, Index: DI}, 1443 {Base: SI}, 1444 {Base: DI}, 1445 {Base: BP}, 1446 {Base: BX}, 1447 } 1448 1449 // baseReg returns the base register for a given register size in bits. 1450 func baseRegForBits(bits int) Reg { 1451 switch bits { 1452 case 8: 1453 return AL 1454 case 16: 1455 return AX 1456 case 32: 1457 return EAX 1458 case 64: 1459 return RAX 1460 } 1461 return 0 1462 } 1463 1464 // baseReg records the base register for argument types that specify 1465 // a range of registers indexed by op, regop, or rm. 1466 var baseReg = [...]Reg{ 1467 xArgDR0dashDR7: DR0, 1468 xArgMm1: M0, 1469 xArgMm2: M0, 1470 xArgMm2M64: M0, 1471 xArgMm: M0, 1472 xArgMmM32: M0, 1473 xArgMmM64: M0, 1474 xArgR16: AX, 1475 xArgR16op: AX, 1476 xArgR32: EAX, 1477 xArgR32M16: EAX, 1478 xArgR32M8: EAX, 1479 xArgR32op: EAX, 1480 xArgR64: RAX, 1481 xArgR64M16: RAX, 1482 xArgR64op: RAX, 1483 xArgR8: AL, 1484 xArgR8op: AL, 1485 xArgRM16: AX, 1486 xArgRM32: EAX, 1487 xArgRM64: RAX, 1488 xArgRM8: AL, 1489 xArgRmf16: AX, 1490 xArgRmf32: EAX, 1491 xArgRmf64: RAX, 1492 xArgSTi: F0, 1493 xArgTR0dashTR7: TR0, 1494 xArgXmm1: X0, 1495 xArgXmm2: X0, 1496 xArgXmm2M128: X0, 1497 xArgXmm2M16: X0, 1498 xArgXmm2M32: X0, 1499 xArgXmm2M64: X0, 1500 xArgXmm: X0, 1501 xArgXmmM128: X0, 1502 xArgXmmM32: X0, 1503 xArgXmmM64: X0, 1504 } 1505 1506 // prefixToSegment returns the segment register 1507 // corresponding to a particular segment prefix. 1508 func prefixToSegment(p Prefix) Reg { 1509 switch p &^ PrefixImplicit { 1510 case PrefixCS: 1511 return CS 1512 case PrefixDS: 1513 return DS 1514 case PrefixES: 1515 return ES 1516 case PrefixFS: 1517 return FS 1518 case PrefixGS: 1519 return GS 1520 case PrefixSS: 1521 return SS 1522 } 1523 return 0 1524 } 1525 1526 // fixedArg records the fixed arguments corresponding to the given bytecodes. 1527 var fixedArg = [...]Arg{ 1528 xArg1: Imm(1), 1529 xArg3: Imm(3), 1530 xArgAL: AL, 1531 xArgAX: AX, 1532 xArgDX: DX, 1533 xArgEAX: EAX, 1534 xArgEDX: EDX, 1535 xArgRAX: RAX, 1536 xArgRDX: RDX, 1537 xArgCL: CL, 1538 xArgCS: CS, 1539 xArgDS: DS, 1540 xArgES: ES, 1541 xArgFS: FS, 1542 xArgGS: GS, 1543 xArgSS: SS, 1544 xArgST: F0, 1545 xArgXMM0: X0, 1546 } 1547 1548 // memBytes records the size of the memory pointed at 1549 // by a memory argument of the given form. 1550 var memBytes = [...]int8{ 1551 xArgM128: 128 / 8, 1552 xArgM16: 16 / 8, 1553 xArgM16and16: (16 + 16) / 8, 1554 xArgM16colon16: (16 + 16) / 8, 1555 xArgM16colon32: (16 + 32) / 8, 1556 xArgM16int: 16 / 8, 1557 xArgM2byte: 2, 1558 xArgM32: 32 / 8, 1559 xArgM32and32: (32 + 32) / 8, 1560 xArgM32fp: 32 / 8, 1561 xArgM32int: 32 / 8, 1562 xArgM64: 64 / 8, 1563 xArgM64fp: 64 / 8, 1564 xArgM64int: 64 / 8, 1565 xArgMm2M64: 64 / 8, 1566 xArgMmM32: 32 / 8, 1567 xArgMmM64: 64 / 8, 1568 xArgMoffs16: 16 / 8, 1569 xArgMoffs32: 32 / 8, 1570 xArgMoffs64: 64 / 8, 1571 xArgMoffs8: 8 / 8, 1572 xArgR32M16: 16 / 8, 1573 xArgR32M8: 8 / 8, 1574 xArgR64M16: 16 / 8, 1575 xArgRM16: 16 / 8, 1576 xArgRM32: 32 / 8, 1577 xArgRM64: 64 / 8, 1578 xArgRM8: 8 / 8, 1579 xArgXmm2M128: 128 / 8, 1580 xArgXmm2M16: 16 / 8, 1581 xArgXmm2M32: 32 / 8, 1582 xArgXmm2M64: 64 / 8, 1583 xArgXmm: 128 / 8, 1584 xArgXmmM128: 128 / 8, 1585 xArgXmmM32: 32 / 8, 1586 xArgXmmM64: 64 / 8, 1587 } 1588 1589 // isCondJmp records the conditional jumps. 1590 var isCondJmp = [maxOp + 1]bool{ 1591 JA: true, 1592 JAE: true, 1593 JB: true, 1594 JBE: true, 1595 JE: true, 1596 JG: true, 1597 JGE: true, 1598 JL: true, 1599 JLE: true, 1600 JNE: true, 1601 JNO: true, 1602 JNP: true, 1603 JNS: true, 1604 JO: true, 1605 JP: true, 1606 JS: true, 1607 } 1608 1609 // isLoop records the loop operators. 1610 var isLoop = [maxOp + 1]bool{ 1611 LOOP: true, 1612 LOOPE: true, 1613 LOOPNE: true, 1614 JECXZ: true, 1615 JRCXZ: true, 1616 } 1617