Home | History | Annotate | Download | only in armasm
      1 // Copyright 2014 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Support for testing against external disassembler program.
      6 // Copied and simplified from ../../x86/x86asm/ext_test.go.
      7 
      8 package armasm
      9 
     10 import (
     11 	"bufio"
     12 	"bytes"
     13 	"encoding/hex"
     14 	"flag"
     15 	"fmt"
     16 	"io/ioutil"
     17 	"log"
     18 	"math/rand"
     19 	"os"
     20 	"os/exec"
     21 	"regexp"
     22 	"runtime"
     23 	"strings"
     24 	"testing"
     25 	"time"
     26 )
     27 
     28 var (
     29 	printTests = flag.Bool("printtests", false, "print test cases that exercise new code paths")
     30 	dumpTest   = flag.Bool("dump", false, "dump all encodings")
     31 	mismatch   = flag.Bool("mismatch", false, "log allowed mismatches")
     32 	longTest   = flag.Bool("long", false, "long test")
     33 	keep       = flag.Bool("keep", false, "keep object files around")
     34 	debug      = false
     35 )
     36 
     37 // An ExtInst represents a single decoded instruction parsed
     38 // from an external disassembler's output.
     39 type ExtInst struct {
     40 	addr uint32
     41 	enc  [4]byte
     42 	nenc int
     43 	text string
     44 }
     45 
     46 func (r ExtInst) String() string {
     47 	return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
     48 }
     49 
     50 // An ExtDis is a connection between an external disassembler and a test.
     51 type ExtDis struct {
     52 	Arch     Mode
     53 	Dec      chan ExtInst
     54 	File     *os.File
     55 	Size     int
     56 	KeepFile bool
     57 	Cmd      *exec.Cmd
     58 }
     59 
     60 // Run runs the given command - the external disassembler - and returns
     61 // a buffered reader of its standard output.
     62 func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
     63 	if *keep {
     64 		log.Printf("%s\n", strings.Join(cmd, " "))
     65 	}
     66 	ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
     67 	out, err := ext.Cmd.StdoutPipe()
     68 	if err != nil {
     69 		return nil, fmt.Errorf("stdoutpipe: %v", err)
     70 	}
     71 	if err := ext.Cmd.Start(); err != nil {
     72 		return nil, fmt.Errorf("exec: %v", err)
     73 	}
     74 
     75 	b := bufio.NewReaderSize(out, 1<<20)
     76 	return b, nil
     77 }
     78 
     79 // Wait waits for the command started with Run to exit.
     80 func (ext *ExtDis) Wait() error {
     81 	return ext.Cmd.Wait()
     82 }
     83 
     84 // testExtDis tests a set of byte sequences against an external disassembler.
     85 // The disassembler is expected to produce the given syntax and be run
     86 // in the given architecture mode (16, 32, or 64-bit).
     87 // The extdis function must start the external disassembler
     88 // and then parse its output, sending the parsed instructions on ext.Dec.
     89 // The generate function calls its argument f once for each byte sequence
     90 // to be tested. The generate function itself will be called twice, and it must
     91 // make the same sequence of calls to f each time.
     92 // When a disassembly does not match the internal decoding,
     93 // allowedMismatch determines whether this mismatch should be
     94 // allowed, or else considered an error.
     95 func testExtDis(
     96 	t *testing.T,
     97 	syntax string,
     98 	arch Mode,
     99 	extdis func(ext *ExtDis) error,
    100 	generate func(f func([]byte)),
    101 	allowedMismatch func(text string, size int, inst *Inst, dec ExtInst) bool,
    102 ) {
    103 	start := time.Now()
    104 	ext := &ExtDis{
    105 		Dec:  make(chan ExtInst),
    106 		Arch: arch,
    107 	}
    108 	errc := make(chan error)
    109 
    110 	// First pass: write instructions to input file for external disassembler.
    111 	file, f, size, err := writeInst(generate)
    112 	if err != nil {
    113 		t.Fatal(err)
    114 	}
    115 	ext.Size = size
    116 	ext.File = f
    117 	defer func() {
    118 		f.Close()
    119 		if !*keep {
    120 			os.Remove(file)
    121 		}
    122 	}()
    123 
    124 	// Second pass: compare disassembly against our decodings.
    125 	var (
    126 		totalTests  = 0
    127 		totalSkips  = 0
    128 		totalErrors = 0
    129 
    130 		errors = make([]string, 0, 100) // sampled errors, at most cap
    131 	)
    132 	go func() {
    133 		errc <- extdis(ext)
    134 	}()
    135 	generate(func(enc []byte) {
    136 		dec, ok := <-ext.Dec
    137 		if !ok {
    138 			t.Errorf("decoding stream ended early")
    139 			return
    140 		}
    141 		inst, text := disasm(syntax, arch, pad(enc))
    142 		totalTests++
    143 		if *dumpTest {
    144 			fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc)
    145 		}
    146 		if text != dec.text || inst.Len != dec.nenc {
    147 			suffix := ""
    148 			if allowedMismatch(text, size, &inst, dec) {
    149 				totalSkips++
    150 				if !*mismatch {
    151 					return
    152 				}
    153 				suffix += " (allowed mismatch)"
    154 			}
    155 			totalErrors++
    156 			if len(errors) >= cap(errors) {
    157 				j := rand.Intn(totalErrors)
    158 				if j >= cap(errors) {
    159 					return
    160 				}
    161 				errors = append(errors[:j], errors[j+1:]...)
    162 			}
    163 			errors = append(errors, fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s", enc, text, inst.Len, dec.text, dec.nenc, suffix))
    164 		}
    165 	})
    166 
    167 	if *mismatch {
    168 		totalErrors -= totalSkips
    169 	}
    170 
    171 	for _, b := range errors {
    172 		t.Log(b)
    173 	}
    174 
    175 	if totalErrors > 0 {
    176 		t.Fail()
    177 	}
    178 	t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
    179 
    180 	if err := <-errc; err != nil {
    181 		t.Fatalf("external disassembler: %v", err)
    182 	}
    183 
    184 }
    185 
    186 const start = 0x8000 // start address of text
    187 
    188 // writeInst writes the generated byte sequences to a new file
    189 // starting at offset start. That file is intended to be the input to
    190 // the external disassembler.
    191 func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
    192 	f, err = ioutil.TempFile("", "armasm")
    193 	if err != nil {
    194 		return
    195 	}
    196 
    197 	file = f.Name()
    198 
    199 	f.Seek(start, 0)
    200 	w := bufio.NewWriter(f)
    201 	defer w.Flush()
    202 	size = 0
    203 	generate(func(x []byte) {
    204 		if len(x) > 4 {
    205 			x = x[:4]
    206 		}
    207 		if debug {
    208 			fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):])
    209 		}
    210 		w.Write(x)
    211 		w.Write(zeros[len(x):])
    212 		size += len(zeros)
    213 	})
    214 	return file, f, size, nil
    215 }
    216 
    217 var zeros = []byte{0, 0, 0, 0}
    218 
    219 // pad pads the code sequence with pops.
    220 func pad(enc []byte) []byte {
    221 	if len(enc) < 4 {
    222 		enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...)
    223 	}
    224 	return enc
    225 }
    226 
    227 // disasm returns the decoded instruction and text
    228 // for the given source bytes, using the given syntax and mode.
    229 func disasm(syntax string, mode Mode, src []byte) (inst Inst, text string) {
    230 	// If printTests is set, we record the coverage value
    231 	// before and after, and we write out the inputs for which
    232 	// coverage went up, in the format expected in testdata/decode.text.
    233 	// This produces a fairly small set of test cases that exercise nearly
    234 	// all the code.
    235 	var cover float64
    236 	if *printTests {
    237 		cover -= coverage()
    238 	}
    239 
    240 	inst, err := Decode(src, mode)
    241 	if err != nil {
    242 		text = "error: " + err.Error()
    243 	} else {
    244 		text = inst.String()
    245 		switch syntax {
    246 		//case "arm":
    247 		//	text = ARMSyntax(inst)
    248 		case "gnu":
    249 			text = GNUSyntax(inst)
    250 		//case "plan9": // [sic]
    251 		//	text = GoSyntax(inst, 0, nil)
    252 		default:
    253 			text = "error: unknown syntax " + syntax
    254 		}
    255 	}
    256 
    257 	if *printTests {
    258 		cover += coverage()
    259 		if cover > 0 {
    260 			max := len(src)
    261 			if max > 4 && inst.Len <= 4 {
    262 				max = 4
    263 			}
    264 			fmt.Printf("%x|%x\t%d\t%s\t%s\n", src[:inst.Len], src[inst.Len:max], mode, syntax, text)
    265 		}
    266 	}
    267 
    268 	return
    269 }
    270 
    271 // coverage returns a floating point number denoting the
    272 // test coverage until now. The number increases when new code paths are exercised,
    273 // both in the Go program and in the decoder byte code.
    274 func coverage() float64 {
    275 	/*
    276 		testing.Coverage is not in the main distribution.
    277 		The implementation, which must go in package testing, is:
    278 
    279 		// Coverage reports the current code coverage as a fraction in the range [0, 1].
    280 		func Coverage() float64 {
    281 			var n, d int64
    282 			for _, counters := range cover.Counters {
    283 				for _, c := range counters {
    284 					if c > 0 {
    285 						n++
    286 					}
    287 					d++
    288 				}
    289 			}
    290 			if d == 0 {
    291 				return 0
    292 			}
    293 			return float64(n) / float64(d)
    294 		}
    295 	*/
    296 
    297 	var f float64
    298 	f += testing.Coverage()
    299 	f += decodeCoverage()
    300 	return f
    301 }
    302 
    303 func decodeCoverage() float64 {
    304 	n := 0
    305 	for _, t := range decoderCover {
    306 		if t {
    307 			n++
    308 		}
    309 	}
    310 	return float64(1+n) / float64(1+len(decoderCover))
    311 }
    312 
    313 // Helpers for writing disassembler output parsers.
    314 
    315 // hasPrefix reports whether any of the space-separated words in the text s
    316 // begins with any of the given prefixes.
    317 func hasPrefix(s string, prefixes ...string) bool {
    318 	for _, prefix := range prefixes {
    319 		for s := s; s != ""; {
    320 			if strings.HasPrefix(s, prefix) {
    321 				return true
    322 			}
    323 			i := strings.Index(s, " ")
    324 			if i < 0 {
    325 				break
    326 			}
    327 			s = s[i+1:]
    328 		}
    329 	}
    330 	return false
    331 }
    332 
    333 // contains reports whether the text s contains any of the given substrings.
    334 func contains(s string, substrings ...string) bool {
    335 	for _, sub := range substrings {
    336 		if strings.Contains(s, sub) {
    337 			return true
    338 		}
    339 	}
    340 	return false
    341 }
    342 
    343 // isHex reports whether b is a hexadecimal character (0-9A-Fa-f).
    344 func isHex(b byte) bool { return b == '0' || unhex[b] > 0 }
    345 
    346 // parseHex parses the hexadecimal byte dump in hex,
    347 // appending the parsed bytes to raw and returning the updated slice.
    348 // The returned bool signals whether any invalid hex was found.
    349 // Spaces and tabs between bytes are okay but any other non-hex is not.
    350 func parseHex(hex []byte, raw []byte) ([]byte, bool) {
    351 	hex = trimSpace(hex)
    352 	for j := 0; j < len(hex); {
    353 		for hex[j] == ' ' || hex[j] == '\t' {
    354 			j++
    355 		}
    356 		if j >= len(hex) {
    357 			break
    358 		}
    359 		if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) {
    360 			return nil, false
    361 		}
    362 		raw = append(raw, unhex[hex[j]]<<4|unhex[hex[j+1]])
    363 		j += 2
    364 	}
    365 	return raw, true
    366 }
    367 
    368 var unhex = [256]byte{
    369 	'0': 0,
    370 	'1': 1,
    371 	'2': 2,
    372 	'3': 3,
    373 	'4': 4,
    374 	'5': 5,
    375 	'6': 6,
    376 	'7': 7,
    377 	'8': 8,
    378 	'9': 9,
    379 	'A': 10,
    380 	'B': 11,
    381 	'C': 12,
    382 	'D': 13,
    383 	'E': 14,
    384 	'F': 15,
    385 	'a': 10,
    386 	'b': 11,
    387 	'c': 12,
    388 	'd': 13,
    389 	'e': 14,
    390 	'f': 15,
    391 }
    392 
    393 // index is like bytes.Index(s, []byte(t)) but avoids the allocation.
    394 func index(s []byte, t string) int {
    395 	i := 0
    396 	for {
    397 		j := bytes.IndexByte(s[i:], t[0])
    398 		if j < 0 {
    399 			return -1
    400 		}
    401 		i = i + j
    402 		if i+len(t) > len(s) {
    403 			return -1
    404 		}
    405 		for k := 1; k < len(t); k++ {
    406 			if s[i+k] != t[k] {
    407 				goto nomatch
    408 			}
    409 		}
    410 		return i
    411 	nomatch:
    412 		i++
    413 	}
    414 }
    415 
    416 // fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
    417 // If s must be rewritten, it is rewritten in place.
    418 func fixSpace(s []byte) []byte {
    419 	s = trimSpace(s)
    420 	for i := 0; i < len(s); i++ {
    421 		if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
    422 			goto Fix
    423 		}
    424 	}
    425 	return s
    426 
    427 Fix:
    428 	b := s
    429 	w := 0
    430 	for i := 0; i < len(s); i++ {
    431 		c := s[i]
    432 		if c == '\t' || c == '\n' {
    433 			c = ' '
    434 		}
    435 		if c == ' ' && w > 0 && b[w-1] == ' ' {
    436 			continue
    437 		}
    438 		b[w] = c
    439 		w++
    440 	}
    441 	if w > 0 && b[w-1] == ' ' {
    442 		w--
    443 	}
    444 	return b[:w]
    445 }
    446 
    447 // trimSpace trims leading and trailing space from s, returning a subslice of s.
    448 func trimSpace(s []byte) []byte {
    449 	j := len(s)
    450 	for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') {
    451 		j--
    452 	}
    453 	i := 0
    454 	for i < j && (s[i] == ' ' || s[i] == '\t') {
    455 		i++
    456 	}
    457 	return s[i:j]
    458 }
    459 
    460 // pcrel matches instructions using relative addressing mode.
    461 var (
    462 	pcrel = regexp.MustCompile(`^((?:.* )?(?:b|bl)x?(?:eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le)?) 0x([0-9a-f]+)$`)
    463 )
    464 
    465 // Generators.
    466 //
    467 // The test cases are described as functions that invoke a callback repeatedly,
    468 // with a new input sequence each time. These helpers make writing those
    469 // a little easier.
    470 
    471 // condCases generates conditional instructions.
    472 func condCases(t *testing.T) func(func([]byte)) {
    473 	return func(try func([]byte)) {
    474 		// All the strides are relatively prime to 2 and therefore to 2,
    475 		// so we will not repeat any instructions until we have tried all 2.
    476 		// Using a stride other than 1 is meant to visit the instructions in a
    477 		// pseudorandom order, which gives better variety in the set of
    478 		// test cases chosen by -printtests.
    479 		stride := uint32(10007)
    480 		n := 1 << 28 / 7
    481 		if testing.Short() {
    482 			stride = 100003
    483 			n = 1 << 28 / 1001
    484 		} else if *longTest {
    485 			stride = 200000033
    486 			n = 1 << 28
    487 		}
    488 		x := uint32(0)
    489 		for i := 0; i < n; i++ {
    490 			enc := (x%15)<<28 | x&(1<<28-1)
    491 			try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)})
    492 			x += stride
    493 		}
    494 	}
    495 }
    496 
    497 // uncondCases generates unconditional instructions.
    498 func uncondCases(t *testing.T) func(func([]byte)) {
    499 	return func(try func([]byte)) {
    500 		condCases(t)(func(enc []byte) {
    501 			enc[3] |= 0xF0
    502 			try(enc)
    503 		})
    504 	}
    505 }
    506 
    507 func countBits(x uint32) int {
    508 	n := 0
    509 	for ; x != 0; x >>= 1 {
    510 		n += int(x & 1)
    511 	}
    512 	return n
    513 }
    514 
    515 func expandBits(x, m uint32) uint32 {
    516 	var out uint32
    517 	for i := uint(0); i < 32; i++ {
    518 		out >>= 1
    519 		if m&1 != 0 {
    520 			out |= (x & 1) << 31
    521 			x >>= 1
    522 		}
    523 		m >>= 1
    524 	}
    525 	return out
    526 }
    527 
    528 func tryCondMask(mask, val uint32, try func([]byte)) {
    529 	n := countBits(^mask)
    530 	bits := uint32(0)
    531 	for i := 0; i < 1<<uint(n); i++ {
    532 		bits += 848251 // arbitrary prime
    533 		x := val | expandBits(bits, ^mask) | uint32(i)%15<<28
    534 		try([]byte{byte(x), byte(x >> 8), byte(x >> 16), byte(x >> 24)})
    535 	}
    536 }
    537 
    538 // vfpCases generates VFP instructions.
    539 func vfpCases(t *testing.T) func(func([]byte)) {
    540 	const (
    541 		vfpmask uint32 = 0xFF00FE10
    542 		vfp     uint32 = 0x0E009A00
    543 	)
    544 	return func(try func([]byte)) {
    545 		tryCondMask(0xff00fe10, 0x0e009a00, try) // standard VFP instruction space
    546 		tryCondMask(0xffc00f7f, 0x0e000b10, try) // VFP MOV core reg to/from float64 half
    547 		tryCondMask(0xffe00f7f, 0x0e000a10, try) // VFP MOV core reg to/from float32
    548 		tryCondMask(0xffef0fff, 0x0ee10a10, try) // VFP MOV core reg to/from cond codes
    549 	}
    550 }
    551 
    552 // hexCases generates the cases written in hexadecimal in the encoded string.
    553 // Spaces in 'encoded' separate entire test cases, not individual bytes.
    554 func hexCases(t *testing.T, encoded string) func(func([]byte)) {
    555 	return func(try func([]byte)) {
    556 		for _, x := range strings.Fields(encoded) {
    557 			src, err := hex.DecodeString(x)
    558 			if err != nil {
    559 				t.Errorf("parsing %q: %v", x, err)
    560 			}
    561 			try(src)
    562 		}
    563 	}
    564 }
    565 
    566 // testdataCases generates the test cases recorded in testdata/decode.txt.
    567 // It only uses the inputs; it ignores the answers recorded in that file.
    568 func testdataCases(t *testing.T) func(func([]byte)) {
    569 	var codes [][]byte
    570 	data, err := ioutil.ReadFile("testdata/decode.txt")
    571 	if err != nil {
    572 		t.Fatal(err)
    573 	}
    574 	for _, line := range strings.Split(string(data), "\n") {
    575 		line = strings.TrimSpace(line)
    576 		if line == "" || strings.HasPrefix(line, "#") {
    577 			continue
    578 		}
    579 		f := strings.Fields(line)[0]
    580 		i := strings.Index(f, "|")
    581 		if i < 0 {
    582 			t.Errorf("parsing %q: missing | separator", f)
    583 			continue
    584 		}
    585 		if i%2 != 0 {
    586 			t.Errorf("parsing %q: misaligned | separator", f)
    587 		}
    588 		code, err := hex.DecodeString(f[:i] + f[i+1:])
    589 		if err != nil {
    590 			t.Errorf("parsing %q: %v", f, err)
    591 			continue
    592 		}
    593 		codes = append(codes, code)
    594 	}
    595 
    596 	return func(try func([]byte)) {
    597 		for _, code := range codes {
    598 			try(code)
    599 		}
    600 	}
    601 }
    602 
    603 func caller(skip int) string {
    604 	pc, _, _, _ := runtime.Caller(skip)
    605 	f := runtime.FuncForPC(pc)
    606 	name := "?"
    607 	if f != nil {
    608 		name = f.Name()
    609 		if i := strings.LastIndex(name, "."); i >= 0 {
    610 			name = name[i+1:]
    611 		}
    612 	}
    613 	return name
    614 }
    615