Home | History | Annotate | Download | only in gen
      1 // Copyright 2017 syzkaller project authors. All rights reserved.
      2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
      3 
      4 // gen generates instruction tables (ifuzz/insns.go) from Intel XED tables.
      5 // Tables used to generate insns.go are checked in in all-enc-instructions.txt.
      6 package main
      7 
      8 import (
      9 	"bufio"
     10 	"fmt"
     11 	"os"
     12 	"reflect"
     13 	"strconv"
     14 	"strings"
     15 
     16 	"github.com/google/syzkaller/pkg/ifuzz"
     17 	"github.com/google/syzkaller/pkg/serializer"
     18 )
     19 
     20 // nolint: gocyclo
     21 func main() {
     22 	if len(os.Args) != 2 {
     23 		failf("usage: gen instructions.txt")
     24 	}
     25 	f, err := os.Open(os.Args[1])
     26 	if err != nil {
     27 		failf("failed to open input file: %v", err)
     28 	}
     29 	defer f.Close()
     30 
     31 	skipped := 0
     32 	saved := ""
     33 	var insns []*ifuzz.Insn
     34 	var insn, insn1 *ifuzz.Insn
     35 	s := bufio.NewScanner(f)
     36 	for i := 1; s.Scan(); i++ {
     37 		reportError := func(msg string, args ...interface{}) {
     38 			fmt.Fprintf(os.Stderr, "line %v: %v\n", i, s.Text())
     39 			failf(msg, args...)
     40 		}
     41 		line := s.Text()
     42 		if comment := strings.IndexByte(line, '#'); comment != -1 {
     43 			line = line[:comment]
     44 		}
     45 		line = strings.TrimSpace(line)
     46 		if line == "" {
     47 			continue
     48 		}
     49 		if line[len(line)-1] == '\\' {
     50 			saved += line[:len(line)-1]
     51 			continue
     52 		}
     53 		line = saved + line
     54 		saved = ""
     55 		if line == "{" {
     56 			insn = new(ifuzz.Insn)
     57 			continue
     58 		}
     59 		if line == "}" {
     60 			if insn1 != nil {
     61 				insns = append(insns, insn1)
     62 				insn1 = nil
     63 				insn = nil
     64 			}
     65 			continue
     66 		}
     67 		colon := strings.IndexByte(line, ':')
     68 		if colon == -1 {
     69 			reportError("no colon")
     70 		}
     71 		name := strings.TrimSpace(line[:colon])
     72 		if name == "" {
     73 			reportError("empty attribute name")
     74 		}
     75 		var vals []string
     76 		for _, v := range strings.Split(line[colon+1:], " ") {
     77 			v = strings.TrimSpace(v)
     78 			if v == "" {
     79 				continue
     80 			}
     81 			vals = append(vals, v)
     82 		}
     83 		switch name {
     84 		case "ICLASS":
     85 			if len(vals) != 1 {
     86 				reportError("ICLASS has more than one value")
     87 			}
     88 			insn.Name = vals[0]
     89 		case "CPL":
     90 			if len(vals) != 1 {
     91 				reportError("CPL has more than one value")
     92 			}
     93 			if vals[0] != "0" && vals[0] != "3" {
     94 				reportError("unknown CPL value: %v", vals[0])
     95 			}
     96 			insn.Priv = vals[0] == "0"
     97 		case "EXTENSION":
     98 			if len(vals) != 1 {
     99 				reportError("EXTENSION has more than one value")
    100 			}
    101 			insn.Extension = vals[0]
    102 			switch insn.Extension {
    103 			case "FMA", "AVX2", "AVX", "F16C", "BMI2", "BMI", "XOP", "FMA4", "AVXAES", "BMI1", "AVX2GATHER":
    104 				insn.Mode = 1<<ifuzz.ModeLong64 | 1<<ifuzz.ModeProt32
    105 			}
    106 			insn.Avx2Gather = insn.Extension == "AVX2GATHER"
    107 		case "PATTERN":
    108 			if insn1 != nil {
    109 				insns = append(insns, insn1)
    110 			}
    111 			insn1 = new(ifuzz.Insn)
    112 			*insn1 = *insn
    113 			if err := parsePattern(insn1, vals); err != nil {
    114 				if _, ok := err.(errSkip); !ok {
    115 					reportError(err.Error())
    116 				}
    117 				if err.Error() != "" {
    118 					fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err)
    119 				}
    120 				skipped++
    121 				insn1 = nil
    122 			}
    123 		case "OPERANDS":
    124 			if insn1 == nil {
    125 				break
    126 			}
    127 			if err := parseOperands(insn1, vals); err != nil {
    128 				if _, ok := err.(errSkip); !ok {
    129 					reportError(err.Error())
    130 				}
    131 				if err.Error() != "" {
    132 					fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err)
    133 				}
    134 				skipped++
    135 				insn1 = nil
    136 			}
    137 		}
    138 	}
    139 
    140 	var deduped []*ifuzz.Insn
    141 nextInsn:
    142 	for _, insn := range insns {
    143 		if insn.Extension == "AVX512VEX" || insn.Extension == "AVX512EVEX" {
    144 			skipped++
    145 			continue
    146 		}
    147 		mod0 := insn.Mod
    148 		for j := len(deduped) - 1; j >= 0; j-- {
    149 			insn1 := deduped[j]
    150 			if insn.Mod == 3 && insn1.Mod == -3 || insn.Mod == -3 && insn1.Mod == 3 || insn1.Mod == -1 {
    151 				insn.Mod = insn1.Mod
    152 			}
    153 			if reflect.DeepEqual(insn, insn1) {
    154 				if insn.Mod != mod0 {
    155 					insn1.Mod = -1
    156 				}
    157 				continue nextInsn
    158 			}
    159 			insn.Mod = mod0
    160 		}
    161 		deduped = append(deduped, insn)
    162 	}
    163 	fmt.Fprintf(os.Stderr, "deduped %v instructions\n", len(insns)-len(deduped))
    164 	insns = deduped
    165 
    166 	fmt.Printf("// AUTOGENERATED FILE\n\n")
    167 	fmt.Printf("package ifuzz\n\n")
    168 	fmt.Printf("import . \"github.com/google/syzkaller/pkg/ifuzz\"\n\n")
    169 	fmt.Printf("func init() { Insns = insns }\n\n")
    170 	fmt.Printf("var insns = ")
    171 	serializer.Write(os.Stdout, insns)
    172 
    173 	fmt.Fprintf(os.Stderr, "handled %v, skipped %v\n", len(insns), skipped)
    174 }
    175 
    176 type errSkip string
    177 
    178 func (err errSkip) Error() string {
    179 	return string(err)
    180 }
    181 
    182 // nolint: gocyclo
    183 func parsePattern(insn *ifuzz.Insn, vals []string) error {
    184 	if insn.Opcode != nil {
    185 		return fmt.Errorf("PATTERN is already parsed for the instruction")
    186 	}
    187 	// As spelled these have incorrect format for 16-bit addressing mode and with 67 prefix.
    188 	if insn.Name == "NOP5" || insn.Name == "NOP6" || insn.Name == "NOP7" ||
    189 		insn.Name == "NOP8" || insn.Name == "NOP9" {
    190 		return errSkip("")
    191 	}
    192 	if insn.Mode == 0 {
    193 		insn.Mode = 1<<ifuzz.ModeLast - 1
    194 	}
    195 	insn.Mod = -100
    196 	insn.Reg = -100
    197 	insn.Rm = -100
    198 	insn.VexP = -1
    199 	for _, v := range vals {
    200 		switch {
    201 		case strings.HasPrefix(v, "0x"):
    202 			op, err := strconv.ParseUint(v, 0, 8)
    203 			if err != nil {
    204 				return fmt.Errorf("failed to parse hex pattern: %v", v)
    205 			}
    206 			if !insn.Modrm {
    207 				insn.Opcode = append(insn.Opcode, byte(op))
    208 			} else {
    209 				insn.Suffix = append(insn.Suffix, byte(op))
    210 			}
    211 		case strings.HasPrefix(v, "0b"):
    212 			if len(v) != 8 || v[6] != '_' {
    213 				return fmt.Errorf("failed to parse bin pattern: %v", v)
    214 			}
    215 			var op byte
    216 			if v[2] == '1' {
    217 				op |= 1 << 7
    218 			}
    219 			if v[3] == '1' {
    220 				op |= 1 << 6
    221 			}
    222 			if v[4] == '1' {
    223 				op |= 1 << 5
    224 			}
    225 			if v[5] == '1' {
    226 				op |= 1 << 4
    227 			}
    228 			if v[7] == '1' {
    229 				op |= 1 << 3
    230 			}
    231 			insn.Opcode = append(insn.Opcode, op)
    232 		case strings.HasPrefix(v, "MOD["):
    233 			insn.Modrm = true
    234 			vv, err := parseModrm(v[3:])
    235 			if err != nil {
    236 				return fmt.Errorf("failed to parse %v: %v", v, err)
    237 			}
    238 			insn.Mod = vv
    239 		case strings.HasPrefix(v, "REG["):
    240 			insn.Modrm = true
    241 			vv, err := parseModrm(v[3:])
    242 			if err != nil {
    243 				return fmt.Errorf("failed to parse %v: %v", v, err)
    244 			}
    245 			insn.Reg = vv
    246 		case strings.HasPrefix(v, "RM["):
    247 			insn.Modrm = true
    248 			vv, err := parseModrm(v[2:])
    249 			if err != nil {
    250 				return fmt.Errorf("failed to parse %v: %v", v, err)
    251 			}
    252 			insn.Rm = vv
    253 		case v == "RM=4":
    254 			insn.Rm = 4
    255 		case strings.HasPrefix(v, "SRM["):
    256 			vv, err := parseModrm(v[3:])
    257 			if err != nil {
    258 				return fmt.Errorf("failed to parse %v: %v", v, err)
    259 			}
    260 			insn.Rm = vv
    261 			insn.Srm = true
    262 		case v == "SRM=0", v == "SRM!=0":
    263 		case v == "MOD!=3":
    264 			if !insn.Modrm || insn.Mod != -1 {
    265 				return fmt.Errorf("MOD!=3 without MOD")
    266 			}
    267 			insn.Mod = -3
    268 		case v == "MOD=3":
    269 			// Most other instructions contain "MOD[0b11] MOD=3",
    270 			// but BNDCL contains "MOD[mm] MOD=3"
    271 			insn.Mod = 3
    272 		case v == "MOD=0":
    273 			insn.Mod = 0
    274 		case v == "MOD=1":
    275 			insn.Mod = 1
    276 		case v == "MOD=2":
    277 			insn.Mod = 2
    278 		case v == "lock_prefix":
    279 			insn.Prefix = append(insn.Prefix, 0xF0)
    280 
    281 		// Immediates.
    282 		case v == "UIMM8()", v == "SIMM8()":
    283 			addImm(insn, 1)
    284 		case v == "UIMM16()":
    285 			addImm(insn, 2)
    286 		case v == "UIMM32()":
    287 			addImm(insn, 4)
    288 		case v == "SIMMz()":
    289 			addImm(insn, -1)
    290 		case v == "UIMMv()":
    291 			addImm(insn, -3)
    292 		case v == "UIMM8_1()":
    293 			addImm(insn, 1)
    294 		case v == "BRDISP8()":
    295 			addImm(insn, 1)
    296 		case v == "BRDISP32()":
    297 			addImm(insn, 4)
    298 		case v == "BRDISPz()":
    299 			addImm(insn, -1)
    300 		case v == "MEMDISPv()":
    301 			addImm(insn, -2)
    302 
    303 		// VOP/VEX
    304 		case v == "XOPV":
    305 			insn.Vex = 0x8f
    306 			insn.Mode &^= 1 << ifuzz.ModeReal16
    307 		case v == "EVV":
    308 			insn.Vex = 0xc4
    309 		case v == "VV1":
    310 			insn.Vex = 0xc4
    311 		case v == "VMAP0":
    312 			insn.VexMap = 0
    313 		case v == "V0F":
    314 			insn.VexMap = 1
    315 		case v == "V0F38":
    316 			insn.VexMap = 2
    317 		case v == "V0F3A":
    318 			insn.VexMap = 3
    319 		case v == "XMAP8":
    320 			insn.VexMap = 8
    321 		case v == "XMAP9":
    322 			insn.VexMap = 9
    323 		case v == "XMAPA":
    324 			insn.VexMap = 10
    325 		case v == "VNP":
    326 			insn.VexP = 0
    327 		case v == "V66":
    328 			insn.VexP = 1
    329 		case v == "VF2":
    330 			insn.VexP = 3
    331 		case v == "VF3":
    332 			insn.VexP = 2
    333 		case v == "VL128", v == "VL=0":
    334 			insn.VexL = -1
    335 		case v == "VL256", v == "VL=1":
    336 			insn.VexL = 1
    337 		case v == "NOVSR":
    338 			insn.VexNoR = true
    339 		case v == "NOEVSR":
    340 			insn.VexNoR = true
    341 			// VEXDEST3=0b1 VEXDEST210=0b111 VEXDEST4=0b0
    342 		case v == "SE_IMM8()":
    343 			addImm(insn, 1)
    344 
    345 		// Modes
    346 		case v == "mode64":
    347 			insn.Mode &= 1 << ifuzz.ModeLong64
    348 		case v == "not64":
    349 			insn.Mode &^= 1 << ifuzz.ModeLong64
    350 		case v == "mode32":
    351 			insn.Mode &= 1 << ifuzz.ModeProt32
    352 		case v == "mode16":
    353 			insn.Mode &= 1<<ifuzz.ModeProt16 | 1<<ifuzz.ModeReal16
    354 		case v == "eamode64",
    355 			v == "eamode32",
    356 			v == "eamode16",
    357 			v == "eanot16":
    358 
    359 		case v == "no_refining_prefix":
    360 			insn.NoRepPrefix = true
    361 			insn.No66Prefix = true
    362 		case v == "no66_prefix", v == "eosz32", v == "eosz64":
    363 			insn.No66Prefix = true
    364 		case v == "f2_refining_prefix", v == "refining_f2", v == "repne", v == "REP=2":
    365 			insn.Prefix = append(insn.Prefix, 0xF2)
    366 			insn.NoRepPrefix = true
    367 		case v == "f3_refining_prefix", v == "refining_f3", v == "repe", v == "REP=3":
    368 			insn.Prefix = append(insn.Prefix, 0xF3)
    369 			insn.NoRepPrefix = true
    370 		case v == "norep", v == "not_refining", v == "REP=0":
    371 			insn.NoRepPrefix = true
    372 		case v == "osz_refining_prefix":
    373 			insn.Prefix = append(insn.Prefix, 0x66)
    374 			insn.NoRepPrefix = true
    375 		case v == "rexw_prefix", v == "W1":
    376 			insn.Rexw = 1
    377 		case v == "norexw_prefix", v == "W0":
    378 			insn.Rexw = -1
    379 		case v == "MPXMODE=1",
    380 			v == "MPXMODE=0",
    381 			v == "TZCNT=1",
    382 			v == "TZCNT=0",
    383 			v == "LZCNT=1",
    384 			v == "LZCNT=0",
    385 			v == "CR_WIDTH()",
    386 			v == "DF64()",
    387 			v == "IMMUNE_REXW()",
    388 			v == "FORCE64()",
    389 			v == "EOSZ=1",
    390 			v == "EOSZ!=1",
    391 			v == "EOSZ=2",
    392 			v == "EOSZ!=2",
    393 			v == "EOSZ=3",
    394 			v == "EOSZ!=3",
    395 			v == "BRANCH_HINT()",
    396 			v == "P4=1",
    397 			v == "P4=0",
    398 			v == "rexb_prefix",
    399 			v == "norexb_prefix",
    400 			v == "IMMUNE66()",
    401 			v == "REFINING66()",
    402 			v == "IGNORE66()",
    403 			v == "IMMUNE66_LOOP64()",
    404 			v == "OVERRIDE_SEG0()",
    405 			v == "OVERRIDE_SEG1()",
    406 			v == "REMOVE_SEGMENT()",
    407 			v == "ONE()",
    408 			v == "nolock_prefix",
    409 			v == "MODRM()",
    410 			v == "VMODRM_XMM()",
    411 			v == "VMODRM_YMM()",
    412 			v == "BCRC=0",
    413 			v == "BCRC=1",
    414 			v == "ESIZE_8_BITS()",
    415 			v == "ESIZE_16_BITS()",
    416 			v == "ESIZE_32_BITS()",
    417 			v == "ESIZE_64_BITS()",
    418 			v == "NELEM_GPR_WRITER_STORE()",
    419 			v == "NELEM_GPR_WRITER_STORE_BYTE()",
    420 			v == "NELEM_GPR_WRITER_STORE_WORD()",
    421 			v == "NELEM_GPR_WRITER_LDOP_Q()",
    422 			v == "NELEM_GPR_WRITER_LDOP_D()",
    423 			v == "NELEM_GPR_READER()",
    424 			v == "NELEM_GPR_READER_BYTE()",
    425 			v == "NELEM_GPR_READER_WORD()",
    426 			v == "NELEM_GSCAT()",
    427 			v == "NELEM_HALF()",
    428 			v == "NELEM_FULL()",
    429 			v == "NELEM_FULLMEM()",
    430 			v == "NELEM_QUARTERMEM()",
    431 			v == "NELEM_EIGHTHMEM()",
    432 			v == "NELEM_HALFMEM()",
    433 			v == "NELEM_QUARTERMEM()",
    434 			v == "NELEM_MEM128()",
    435 			v == "NELEM_SCALAR()",
    436 			v == "NELEM_TUPLE1()",
    437 			v == "NELEM_TUPLE2()",
    438 			v == "NELEM_TUPLE4()",
    439 			v == "NELEM_TUPLE8()",
    440 			v == "NELEM_TUPLE1_4X()",
    441 			v == "NELEM_TUPLE1_BYTE()",
    442 			v == "NELEM_TUPLE1_WORD()",
    443 			v == "NELEM_MOVDDUP()",
    444 			v == "UISA_VMODRM_XMM()",
    445 			v == "UISA_VMODRM_YMM()",
    446 			v == "UISA_VMODRM_ZMM()",
    447 			v == "MASK=0",
    448 			v == "FIX_ROUND_LEN128()",
    449 			v == "FIX_ROUND_LEN512()",
    450 			v == "AVX512_ROUND()",
    451 			v == "ZEROING=0",
    452 			v == "SAE()",
    453 			v == "VL512", // VL=2
    454 			v == "not_refining_f3",
    455 			strings.HasPrefix(v, "MODEP5="):
    456 		default:
    457 			return errSkip(fmt.Sprintf("unknown pattern %v", v))
    458 		}
    459 	}
    460 	if insn.Modrm {
    461 		switch insn.Mod {
    462 		case -3, -1, 0, 1, 2, 3:
    463 		default:
    464 			return fmt.Errorf("bad MOD value: %v", insn.Mod)
    465 		}
    466 		if insn.Reg < -1 || insn.Reg > 7 {
    467 			return fmt.Errorf("bad REG value: %v", insn.Mod)
    468 		}
    469 		if insn.Rm < -1 || insn.Rm > 7 {
    470 			return fmt.Errorf("bad RM value: %v", insn.Mod)
    471 		}
    472 	}
    473 	if insn.Imm != 0 && len(insn.Suffix) != 0 {
    474 		return fmt.Errorf("both immediate and suffix opcode")
    475 	}
    476 	if insn.Mode == 0 {
    477 		return errSkip("no modes for instruction")
    478 	}
    479 	return nil
    480 }
    481 
    482 func parseOperands(insn *ifuzz.Insn, vals []string) error {
    483 	for _, v := range vals {
    484 		switch v {
    485 		case "REG0=SEG():r", "REG1=SEG():r", "REG0=SEG():w":
    486 			if insn.Reg != -1 {
    487 				return fmt.Errorf("REG=SEG() operand, but fixed reg")
    488 			}
    489 			insn.Reg = -6
    490 		case "REG0=CR_R():w", "REG1=CR_R():r":
    491 			if insn.Reg != -1 {
    492 				return fmt.Errorf("REG=CR_R() operand, but fixed reg")
    493 			}
    494 			insn.Reg = -8
    495 			insn.NoSibDisp = true
    496 		case "REG0=DR_R():w", "REG1=DR_R():r":
    497 			insn.NoSibDisp = true
    498 		case "MEM0:r:mem16", "MEM0:w:mem16", "MEM0:r:mem16int", "MEM0:w:mem16int":
    499 			insn.Mem16 = true
    500 		case "MEM0:r:mem32real", "MEM0:r:mem32int", "MEM0:w:mem32real", "MEM0:w:mem32int":
    501 			insn.Mem32 = true
    502 		}
    503 	}
    504 	return nil
    505 }
    506 
    507 func parseModrm(v string) (int8, error) {
    508 	if len(v) < 4 || len(v) > 7 || v[0] != '[' || v[len(v)-1] != ']' {
    509 		return 0, fmt.Errorf("malformed")
    510 	}
    511 	if v == "[mm]" || v == "[rrr]" || v == "[nnn]" {
    512 		return -1, nil
    513 	}
    514 	if !strings.HasPrefix(v, "[0b") {
    515 		return 0, fmt.Errorf("malformed")
    516 	}
    517 	var vv int8
    518 	for i := 3; i < len(v)-1; i++ {
    519 		if v[i] != '0' && v[i] != '1' {
    520 			return 0, fmt.Errorf("malformed")
    521 		}
    522 		vv *= 2
    523 		if v[i] == '1' {
    524 			vv++
    525 		}
    526 	}
    527 	return vv, nil
    528 }
    529 
    530 func addImm(insn *ifuzz.Insn, imm int8) {
    531 	if insn.Imm == 0 {
    532 		insn.Imm = imm
    533 		return
    534 	}
    535 	if insn.Imm2 == 0 {
    536 		insn.Imm2 = imm
    537 		return
    538 	}
    539 	panic("too many immediates")
    540 }
    541 
    542 func failf(msg string, args ...interface{}) {
    543 	fmt.Fprintf(os.Stderr, msg+"\n", args...)
    544 	os.Exit(1)
    545 }
    546