1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Support for testing against external disassembler program. 6 // Copied and simplified from rsc.io/arm/armasm/ext_test.go. 7 8 package ppc64asm 9 10 import ( 11 "bufio" 12 "bytes" 13 "encoding/binary" 14 "encoding/hex" 15 "flag" 16 "fmt" 17 "io/ioutil" 18 "log" 19 "math/rand" 20 "os" 21 "os/exec" 22 "regexp" 23 "runtime" 24 "strings" 25 "testing" 26 "time" 27 ) 28 29 var ( 30 printTests = flag.Bool("printtests", false, "print test cases that exercise new code paths") 31 dumpTest = flag.Bool("dump", false, "dump all encodings") 32 mismatch = flag.Bool("mismatch", false, "log allowed mismatches") 33 longTest = flag.Bool("long", false, "long test") 34 keep = flag.Bool("keep", false, "keep object files around") 35 debug = false 36 ) 37 38 // An ExtInst represents a single decoded instruction parsed 39 // from an external disassembler's output. 40 type ExtInst struct { 41 addr uint32 42 enc [4]byte 43 nenc int 44 text string 45 } 46 47 func (r ExtInst) String() string { 48 return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text) 49 } 50 51 // An ExtDis is a connection between an external disassembler and a test. 52 type ExtDis struct { 53 Dec chan ExtInst 54 File *os.File 55 Size int 56 KeepFile bool 57 Cmd *exec.Cmd 58 } 59 60 // Run runs the given command - the external disassembler - and returns 61 // a buffered reader of its standard output. 62 func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) { 63 if *keep { 64 log.Printf("%s\n", strings.Join(cmd, " ")) 65 } 66 ext.Cmd = exec.Command(cmd[0], cmd[1:]...) 67 out, err := ext.Cmd.StdoutPipe() 68 if err != nil { 69 return nil, fmt.Errorf("stdoutpipe: %v", err) 70 } 71 if err := ext.Cmd.Start(); err != nil { 72 return nil, fmt.Errorf("exec: %v", err) 73 } 74 75 b := bufio.NewReaderSize(out, 1<<20) 76 return b, nil 77 } 78 79 // Wait waits for the command started with Run to exit. 80 func (ext *ExtDis) Wait() error { 81 return ext.Cmd.Wait() 82 } 83 84 // testExtDis tests a set of byte sequences against an external disassembler. 85 // The disassembler is expected to produce the given syntax and be run 86 // in the given architecture mode (16, 32, or 64-bit). 87 // The extdis function must start the external disassembler 88 // and then parse its output, sending the parsed instructions on ext.Dec. 89 // The generate function calls its argument f once for each byte sequence 90 // to be tested. The generate function itself will be called twice, and it must 91 // make the same sequence of calls to f each time. 92 // When a disassembly does not match the internal decoding, 93 // allowedMismatch determines whether this mismatch should be 94 // allowed, or else considered an error. 95 func testExtDis( 96 t *testing.T, 97 syntax string, 98 extdis func(ext *ExtDis) error, 99 generate func(f func([]byte)), 100 allowedMismatch func(text string, size int, inst *Inst, dec ExtInst) bool, 101 ) { 102 start := time.Now() 103 ext := &ExtDis{ 104 Dec: make(chan ExtInst), 105 } 106 errc := make(chan error) 107 108 // First pass: write instructions to input file for external disassembler. 109 file, f, size, err := writeInst(generate) 110 if err != nil { 111 t.Fatal(err) 112 } 113 ext.Size = size 114 ext.File = f 115 defer func() { 116 f.Close() 117 if !*keep { 118 os.Remove(file) 119 } 120 }() 121 122 // Second pass: compare disassembly against our decodings. 123 var ( 124 totalTests = 0 125 totalSkips = 0 126 totalErrors = 0 127 128 errors = make([]string, 0, 100) // sampled errors, at most cap 129 ) 130 go func() { 131 errc <- extdis(ext) 132 }() 133 generate(func(enc []byte) { 134 dec, ok := <-ext.Dec 135 if !ok { 136 t.Errorf("decoding stream ended early") 137 return 138 } 139 inst, text := disasm(syntax, pad(enc)) 140 totalTests++ 141 if *dumpTest { 142 fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc) 143 } 144 if text != dec.text || inst.Len != dec.nenc { 145 suffix := "" 146 if allowedMismatch(text, size, &inst, dec) { 147 totalSkips++ 148 if !*mismatch { 149 return 150 } 151 suffix += " (allowed mismatch)" 152 } 153 totalErrors++ 154 if len(errors) >= cap(errors) { 155 j := rand.Intn(totalErrors) 156 if j >= cap(errors) { 157 return 158 } 159 errors = append(errors[:j], errors[j+1:]...) 160 } 161 errors = append(errors, fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s", enc, text, inst.Len, dec.text, dec.nenc, suffix)) 162 } 163 }) 164 165 if *mismatch { 166 totalErrors -= totalSkips 167 } 168 169 for _, b := range errors { 170 t.Log(b) 171 } 172 173 if totalErrors > 0 { 174 t.Fail() 175 } 176 t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds()) 177 178 if err := <-errc; err != nil { 179 t.Fatalf("external disassembler: %v", err) 180 } 181 182 } 183 184 const start = 0x8000 // start address of text 185 186 // writeInst writes the generated byte sequences to a new file 187 // starting at offset start. That file is intended to be the input to 188 // the external disassembler. 189 func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) { 190 f, err = ioutil.TempFile("", "ppc64asm") 191 if err != nil { 192 return 193 } 194 195 file = f.Name() 196 197 f.Seek(start, 0) 198 w := bufio.NewWriter(f) 199 defer w.Flush() 200 size = 0 201 generate(func(x []byte) { 202 if len(x) > 4 { 203 x = x[:4] 204 } 205 if debug { 206 fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):]) 207 } 208 w.Write(x) 209 w.Write(zeros[len(x):]) 210 size += len(zeros) 211 }) 212 return file, f, size, nil 213 } 214 215 var zeros = []byte{0, 0, 0, 0} 216 217 // pad pads the code sequence with pops. 218 func pad(enc []byte) []byte { 219 if len(enc) < 4 { 220 enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...) 221 } 222 return enc 223 } 224 225 // disasm returns the decoded instruction and text 226 // for the given source bytes, using the given syntax and mode. 227 func disasm(syntax string, src []byte) (inst Inst, text string) { 228 // If printTests is set, we record the coverage value 229 // before and after, and we write out the inputs for which 230 // coverage went up, in the format expected in testdata/decode.text. 231 // This produces a fairly small set of test cases that exercise nearly 232 // all the code. 233 var cover float64 234 if *printTests { 235 cover -= coverage() 236 } 237 238 inst, err := Decode(src, binary.BigEndian) 239 if err != nil { 240 text = "error: " + err.Error() 241 } else { 242 text = inst.String() 243 switch syntax { 244 //case "arm": 245 // text = ARMSyntax(inst) 246 case "gnu": 247 text = GNUSyntax(inst) 248 //case "plan9": 249 // text = GoSyntax(inst, 0, nil) 250 default: 251 text = "error: unknown syntax " + syntax 252 } 253 } 254 255 if *printTests { 256 cover += coverage() 257 if cover > 0 { 258 max := len(src) 259 if max > 4 && inst.Len <= 4 { 260 max = 4 261 } 262 fmt.Printf("%x|%x\t%s\t%s\n", src[:inst.Len], src[inst.Len:max], syntax, text) 263 } 264 } 265 266 return 267 } 268 269 // coverage returns a floating point number denoting the 270 // test coverage until now. The number increases when new code paths are exercised, 271 // both in the Go program and in the decoder byte code. 272 func coverage() float64 { 273 var f float64 274 f += testing.Coverage() 275 f += decodeCoverage() 276 return f 277 } 278 279 func decodeCoverage() float64 { 280 n := 0 281 for _, t := range decoderCover { 282 if t { 283 n++ 284 } 285 } 286 return float64(1+n) / float64(1+len(decoderCover)) 287 } 288 289 // Helpers for writing disassembler output parsers. 290 291 // hasPrefix reports whether any of the space-separated words in the text s 292 // begins with any of the given prefixes. 293 func hasPrefix(s string, prefixes ...string) bool { 294 for _, prefix := range prefixes { 295 for s := s; s != ""; { 296 if strings.HasPrefix(s, prefix) { 297 return true 298 } 299 i := strings.Index(s, " ") 300 if i < 0 { 301 break 302 } 303 s = s[i+1:] 304 } 305 } 306 return false 307 } 308 309 // contains reports whether the text s contains any of the given substrings. 310 func contains(s string, substrings ...string) bool { 311 for _, sub := range substrings { 312 if strings.Contains(s, sub) { 313 return true 314 } 315 } 316 return false 317 } 318 319 // isHex reports whether b is a hexadecimal character (0-9A-Fa-f). 320 func isHex(b byte) bool { return b == '0' || unhex[b] > 0 } 321 322 // parseHex parses the hexadecimal byte dump in hex, 323 // appending the parsed bytes to raw and returning the updated slice. 324 // The returned bool signals whether any invalid hex was found. 325 // Spaces and tabs between bytes are okay but any other non-hex is not. 326 func parseHex(hex []byte, raw []byte) ([]byte, bool) { 327 hex = trimSpace(hex) 328 for j := 0; j < len(hex); { 329 for hex[j] == ' ' || hex[j] == '\t' { 330 j++ 331 } 332 if j >= len(hex) { 333 break 334 } 335 if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) { 336 return nil, false 337 } 338 raw = append(raw, unhex[hex[j]]<<4|unhex[hex[j+1]]) 339 j += 2 340 } 341 return raw, true 342 } 343 344 var unhex = [256]byte{ 345 '0': 0, 346 '1': 1, 347 '2': 2, 348 '3': 3, 349 '4': 4, 350 '5': 5, 351 '6': 6, 352 '7': 7, 353 '8': 8, 354 '9': 9, 355 'A': 10, 356 'B': 11, 357 'C': 12, 358 'D': 13, 359 'E': 14, 360 'F': 15, 361 'a': 10, 362 'b': 11, 363 'c': 12, 364 'd': 13, 365 'e': 14, 366 'f': 15, 367 } 368 369 // index is like bytes.Index(s, []byte(t)) but avoids the allocation. 370 func index(s []byte, t string) int { 371 i := 0 372 for { 373 j := bytes.IndexByte(s[i:], t[0]) 374 if j < 0 { 375 return -1 376 } 377 i = i + j 378 if i+len(t) > len(s) { 379 return -1 380 } 381 for k := 1; k < len(t); k++ { 382 if s[i+k] != t[k] { 383 goto nomatch 384 } 385 } 386 return i 387 nomatch: 388 i++ 389 } 390 } 391 392 // fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s. 393 // If s must be rewritten, it is rewritten in place. 394 func fixSpace(s []byte) []byte { 395 s = trimSpace(s) 396 for i := 0; i < len(s); i++ { 397 if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' { 398 goto Fix 399 } 400 } 401 return s 402 403 Fix: 404 b := s 405 w := 0 406 for i := 0; i < len(s); i++ { 407 c := s[i] 408 if c == '\t' || c == '\n' { 409 c = ' ' 410 } 411 if c == ' ' && w > 0 && b[w-1] == ' ' { 412 continue 413 } 414 b[w] = c 415 w++ 416 } 417 if w > 0 && b[w-1] == ' ' { 418 w-- 419 } 420 return b[:w] 421 } 422 423 // trimSpace trims leading and trailing space from s, returning a subslice of s. 424 func trimSpace(s []byte) []byte { 425 j := len(s) 426 for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') { 427 j-- 428 } 429 i := 0 430 for i < j && (s[i] == ' ' || s[i] == '\t') { 431 i++ 432 } 433 return s[i:j] 434 } 435 436 // pcrel matches instructions using relative addressing mode. 437 var ( 438 pcrel = regexp.MustCompile(`^((?:.* )?(?:b|bc)[^ac ]* (?:(?:[0-9]{1,2},)|(?:[0-7]\*)|\+|lt|gt|eq|so|cr[0-7]|,)*)0x([0-9a-f]+)$`) 439 ) 440 441 // Generators. 442 // 443 // The test cases are described as functions that invoke a callback repeatedly, 444 // with a new input sequence each time. These helpers make writing those 445 // a little easier. 446 447 // randomCases generates random instructions. 448 func randomCases(t *testing.T) func(func([]byte)) { 449 return func(try func([]byte)) { 450 // All the strides are relatively prime to 2 and therefore to 2, 451 // so we will not repeat any instructions until we have tried all 2. 452 // Using a stride other than 1 is meant to visit the instructions in a 453 // pseudorandom order, which gives better variety in the set of 454 // test cases chosen by -printtests. 455 stride := uint32(10007) 456 n := 1 << 28 / 7 457 if testing.Short() { 458 stride = 100003 459 n = 1 << 28 / 1001 460 } else if *longTest { 461 stride = 2000033 462 n = 1 << 29 463 } 464 x := uint32(0) 465 for i := 0; i < n; i++ { 466 enc := (x%15)<<28 | x&(1<<28-1) 467 try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)}) 468 x += stride 469 } 470 } 471 } 472 473 // hexCases generates the cases written in hexadecimal in the encoded string. 474 // Spaces in 'encoded' separate entire test cases, not individual bytes. 475 func hexCases(t *testing.T, encoded string) func(func([]byte)) { 476 return func(try func([]byte)) { 477 for _, x := range strings.Fields(encoded) { 478 src, err := hex.DecodeString(x) 479 if err != nil { 480 t.Errorf("parsing %q: %v", x, err) 481 } 482 try(src) 483 } 484 } 485 } 486 487 // testdataCases generates the test cases recorded in testdata/decode.txt. 488 // It only uses the inputs; it ignores the answers recorded in that file. 489 func testdataCases(t *testing.T) func(func([]byte)) { 490 var codes [][]byte 491 data, err := ioutil.ReadFile("testdata/decode.txt") 492 if err != nil { 493 t.Fatal(err) 494 } 495 for _, line := range strings.Split(string(data), "\n") { 496 line = strings.TrimSpace(line) 497 if line == "" || strings.HasPrefix(line, "#") { 498 continue 499 } 500 f := strings.Fields(line)[0] 501 i := strings.Index(f, "|") 502 if i < 0 { 503 t.Errorf("parsing %q: missing | separator", f) 504 continue 505 } 506 if i%2 != 0 { 507 t.Errorf("parsing %q: misaligned | separator", f) 508 } 509 code, err := hex.DecodeString(f[:i] + f[i+1:]) 510 if err != nil { 511 t.Errorf("parsing %q: %v", f, err) 512 continue 513 } 514 codes = append(codes, code) 515 } 516 517 return func(try func([]byte)) { 518 for _, code := range codes { 519 try(code) 520 } 521 } 522 } 523 524 func caller(skip int) string { 525 pc, _, _, _ := runtime.Caller(skip) 526 f := runtime.FuncForPC(pc) 527 name := "?" 528 if f != nil { 529 name = f.Name() 530 if i := strings.LastIndex(name, "."); i >= 0 { 531 name = name[i+1:] 532 } 533 } 534 return name 535 } 536