Home | History | Annotate | Download | only in objfile
      1 // Copyright 2014 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package objfile
      6 
      7 import (
      8 	"bufio"
      9 	"bytes"
     10 	"cmd/internal/src"
     11 	"container/list"
     12 	"debug/gosym"
     13 	"encoding/binary"
     14 	"fmt"
     15 	"io"
     16 	"io/ioutil"
     17 	"os"
     18 	"path/filepath"
     19 	"regexp"
     20 	"sort"
     21 	"strings"
     22 	"text/tabwriter"
     23 
     24 	"golang.org/x/arch/arm/armasm"
     25 	"golang.org/x/arch/arm64/arm64asm"
     26 	"golang.org/x/arch/ppc64/ppc64asm"
     27 	"golang.org/x/arch/x86/x86asm"
     28 )
     29 
     30 // Disasm is a disassembler for a given File.
     31 type Disasm struct {
     32 	syms      []Sym            //symbols in file, sorted by address
     33 	pcln      Liner            // pcln table
     34 	text      []byte           // bytes of text segment (actual instructions)
     35 	textStart uint64           // start PC of text
     36 	textEnd   uint64           // end PC of text
     37 	goarch    string           // GOARCH string
     38 	disasm    disasmFunc       // disassembler function for goarch
     39 	byteOrder binary.ByteOrder // byte order for goarch
     40 }
     41 
     42 // Disasm returns a disassembler for the file f.
     43 func (e *Entry) Disasm() (*Disasm, error) {
     44 	syms, err := e.Symbols()
     45 	if err != nil {
     46 		return nil, err
     47 	}
     48 
     49 	pcln, err := e.PCLineTable()
     50 	if err != nil {
     51 		return nil, err
     52 	}
     53 
     54 	textStart, textBytes, err := e.Text()
     55 	if err != nil {
     56 		return nil, err
     57 	}
     58 
     59 	goarch := e.GOARCH()
     60 	disasm := disasms[goarch]
     61 	byteOrder := byteOrders[goarch]
     62 	if disasm == nil || byteOrder == nil {
     63 		return nil, fmt.Errorf("unsupported architecture")
     64 	}
     65 
     66 	// Filter out section symbols, overwriting syms in place.
     67 	keep := syms[:0]
     68 	for _, sym := range syms {
     69 		switch sym.Name {
     70 		case "runtime.text", "text", "_text", "runtime.etext", "etext", "_etext":
     71 			// drop
     72 		default:
     73 			keep = append(keep, sym)
     74 		}
     75 	}
     76 	syms = keep
     77 	d := &Disasm{
     78 		syms:      syms,
     79 		pcln:      pcln,
     80 		text:      textBytes,
     81 		textStart: textStart,
     82 		textEnd:   textStart + uint64(len(textBytes)),
     83 		goarch:    goarch,
     84 		disasm:    disasm,
     85 		byteOrder: byteOrder,
     86 	}
     87 
     88 	return d, nil
     89 }
     90 
     91 // lookup finds the symbol name containing addr.
     92 func (d *Disasm) lookup(addr uint64) (name string, base uint64) {
     93 	i := sort.Search(len(d.syms), func(i int) bool { return addr < d.syms[i].Addr })
     94 	if i > 0 {
     95 		s := d.syms[i-1]
     96 		if s.Addr != 0 && s.Addr <= addr && addr < s.Addr+uint64(s.Size) {
     97 			return s.Name, s.Addr
     98 		}
     99 	}
    100 	return "", 0
    101 }
    102 
    103 // base returns the final element in the path.
    104 // It works on both Windows and Unix paths,
    105 // regardless of host operating system.
    106 func base(path string) string {
    107 	path = path[strings.LastIndex(path, "/")+1:]
    108 	path = path[strings.LastIndex(path, `\`)+1:]
    109 	return path
    110 }
    111 
    112 // CachedFile contains the content of a file split into lines.
    113 type CachedFile struct {
    114 	FileName string
    115 	Lines    [][]byte
    116 }
    117 
    118 // FileCache is a simple LRU cache of file contents.
    119 type FileCache struct {
    120 	files  *list.List
    121 	maxLen int
    122 }
    123 
    124 // NewFileCache returns a FileCache which can contain up to maxLen cached file contents.
    125 func NewFileCache(maxLen int) *FileCache {
    126 	return &FileCache{
    127 		files:  list.New(),
    128 		maxLen: maxLen,
    129 	}
    130 }
    131 
    132 // Line returns the source code line for the given file and line number.
    133 // If the file is not already cached, reads it , inserts it into the cache,
    134 // and removes the least recently used file if necessary.
    135 // If the file is in cache, moves it up to the front of the list.
    136 func (fc *FileCache) Line(filename string, line int) ([]byte, error) {
    137 	if filepath.Ext(filename) != ".go" {
    138 		return nil, nil
    139 	}
    140 
    141 	// Clean filenames returned by src.Pos.SymFilename()
    142 	// or src.PosBase.SymFilename() removing
    143 	// the leading src.FileSymPrefix.
    144 	if strings.HasPrefix(filename, src.FileSymPrefix) {
    145 		filename = filename[len(src.FileSymPrefix):]
    146 	}
    147 
    148 	// Expand literal "$GOROOT" rewrited by obj.AbsFile()
    149 	filename = filepath.Clean(os.ExpandEnv(filename))
    150 
    151 	var cf *CachedFile
    152 	var e *list.Element
    153 
    154 	for e = fc.files.Front(); e != nil; e = e.Next() {
    155 		cf = e.Value.(*CachedFile)
    156 		if cf.FileName == filename {
    157 			break
    158 		}
    159 	}
    160 
    161 	if e == nil {
    162 		content, err := ioutil.ReadFile(filename)
    163 		if err != nil {
    164 			return nil, err
    165 		}
    166 
    167 		cf = &CachedFile{
    168 			FileName: filename,
    169 			Lines:    bytes.Split(content, []byte{'\n'}),
    170 		}
    171 		fc.files.PushFront(cf)
    172 
    173 		if fc.files.Len() >= fc.maxLen {
    174 			fc.files.Remove(fc.files.Back())
    175 		}
    176 	} else {
    177 		fc.files.MoveToFront(e)
    178 	}
    179 
    180 	return cf.Lines[line-1], nil
    181 }
    182 
    183 // Print prints a disassembly of the file to w.
    184 // If filter is non-nil, the disassembly only includes functions with names matching filter.
    185 // If printCode is true, the disassembly includs corresponding source lines.
    186 // The disassembly only includes functions that overlap the range [start, end).
    187 func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64, printCode bool) {
    188 	if start < d.textStart {
    189 		start = d.textStart
    190 	}
    191 	if end > d.textEnd {
    192 		end = d.textEnd
    193 	}
    194 	printed := false
    195 	bw := bufio.NewWriter(w)
    196 
    197 	var fc *FileCache
    198 	if printCode {
    199 		fc = NewFileCache(8)
    200 	}
    201 
    202 	for _, sym := range d.syms {
    203 		symStart := sym.Addr
    204 		symEnd := sym.Addr + uint64(sym.Size)
    205 		relocs := sym.Relocs
    206 		if sym.Code != 'T' && sym.Code != 't' ||
    207 			symStart < d.textStart ||
    208 			symEnd <= start || end <= symStart ||
    209 			filter != nil && !filter.MatchString(sym.Name) {
    210 			continue
    211 		}
    212 		if printed {
    213 			fmt.Fprintf(bw, "\n")
    214 		}
    215 		printed = true
    216 
    217 		file, _, _ := d.pcln.PCToLine(sym.Addr)
    218 		fmt.Fprintf(bw, "TEXT %s(SB) %s\n", sym.Name, file)
    219 
    220 		tw := tabwriter.NewWriter(bw, 18, 8, 1, '\t', tabwriter.StripEscape)
    221 		if symEnd > end {
    222 			symEnd = end
    223 		}
    224 		code := d.text[:end-d.textStart]
    225 
    226 		var lastFile string
    227 		var lastLine int
    228 
    229 		d.Decode(symStart, symEnd, relocs, func(pc, size uint64, file string, line int, text string) {
    230 			i := pc - d.textStart
    231 
    232 			if printCode {
    233 				if file != lastFile || line != lastLine {
    234 					if srcLine, err := fc.Line(file, line); err == nil {
    235 						fmt.Fprintf(tw, "%s%s%s\n", []byte{tabwriter.Escape}, srcLine, []byte{tabwriter.Escape})
    236 					}
    237 
    238 					lastFile, lastLine = file, line
    239 				}
    240 
    241 				fmt.Fprintf(tw, "  %#x\t", pc)
    242 			} else {
    243 				fmt.Fprintf(tw, "  %s:%d\t%#x\t", base(file), line, pc)
    244 			}
    245 
    246 			if size%4 != 0 || d.goarch == "386" || d.goarch == "amd64" || d.goarch == "amd64p32" {
    247 				// Print instruction as bytes.
    248 				fmt.Fprintf(tw, "%x", code[i:i+size])
    249 			} else {
    250 				// Print instruction as 32-bit words.
    251 				for j := uint64(0); j < size; j += 4 {
    252 					if j > 0 {
    253 						fmt.Fprintf(tw, " ")
    254 					}
    255 					fmt.Fprintf(tw, "%08x", d.byteOrder.Uint32(code[i+j:]))
    256 				}
    257 			}
    258 			fmt.Fprintf(tw, "\t%s\n", text)
    259 		})
    260 		tw.Flush()
    261 	}
    262 	bw.Flush()
    263 }
    264 
    265 // Decode disassembles the text segment range [start, end), calling f for each instruction.
    266 func (d *Disasm) Decode(start, end uint64, relocs []Reloc, f func(pc, size uint64, file string, line int, text string)) {
    267 	if start < d.textStart {
    268 		start = d.textStart
    269 	}
    270 	if end > d.textEnd {
    271 		end = d.textEnd
    272 	}
    273 	code := d.text[:end-d.textStart]
    274 	lookup := d.lookup
    275 	for pc := start; pc < end; {
    276 		i := pc - d.textStart
    277 		text, size := d.disasm(code[i:], pc, lookup, d.byteOrder)
    278 		file, line, _ := d.pcln.PCToLine(pc)
    279 		text += "\t"
    280 		first := true
    281 		for len(relocs) > 0 && relocs[0].Addr < i+uint64(size) {
    282 			if first {
    283 				first = false
    284 			} else {
    285 				text += " "
    286 			}
    287 			text += relocs[0].Stringer.String(pc - start)
    288 			relocs = relocs[1:]
    289 		}
    290 		f(pc, uint64(size), file, line, text)
    291 		pc += uint64(size)
    292 	}
    293 }
    294 
    295 type lookupFunc = func(addr uint64) (sym string, base uint64)
    296 type disasmFunc func(code []byte, pc uint64, lookup lookupFunc, ord binary.ByteOrder) (text string, size int)
    297 
    298 func disasm_386(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder) (string, int) {
    299 	return disasm_x86(code, pc, lookup, 32)
    300 }
    301 
    302 func disasm_amd64(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder) (string, int) {
    303 	return disasm_x86(code, pc, lookup, 64)
    304 }
    305 
    306 func disasm_x86(code []byte, pc uint64, lookup lookupFunc, arch int) (string, int) {
    307 	inst, err := x86asm.Decode(code, arch)
    308 	var text string
    309 	size := inst.Len
    310 	if err != nil || size == 0 || inst.Op == 0 {
    311 		size = 1
    312 		text = "?"
    313 	} else {
    314 		text = x86asm.GoSyntax(inst, pc, lookup)
    315 	}
    316 	return text, size
    317 }
    318 
    319 type textReader struct {
    320 	code []byte
    321 	pc   uint64
    322 }
    323 
    324 func (r textReader) ReadAt(data []byte, off int64) (n int, err error) {
    325 	if off < 0 || uint64(off) < r.pc {
    326 		return 0, io.EOF
    327 	}
    328 	d := uint64(off) - r.pc
    329 	if d >= uint64(len(r.code)) {
    330 		return 0, io.EOF
    331 	}
    332 	n = copy(data, r.code[d:])
    333 	if n < len(data) {
    334 		err = io.ErrUnexpectedEOF
    335 	}
    336 	return
    337 }
    338 
    339 func disasm_arm(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder) (string, int) {
    340 	inst, err := armasm.Decode(code, armasm.ModeARM)
    341 	var text string
    342 	size := inst.Len
    343 	if err != nil || size == 0 || inst.Op == 0 {
    344 		size = 4
    345 		text = "?"
    346 	} else {
    347 		text = armasm.GoSyntax(inst, pc, lookup, textReader{code, pc})
    348 	}
    349 	return text, size
    350 }
    351 
    352 func disasm_arm64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder) (string, int) {
    353 	inst, err := arm64asm.Decode(code)
    354 	var text string
    355 	if err != nil || inst.Op == 0 {
    356 		text = "?"
    357 	} else {
    358 		text = arm64asm.GoSyntax(inst, pc, lookup, textReader{code, pc})
    359 	}
    360 	return text, 4
    361 }
    362 
    363 func disasm_ppc64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder) (string, int) {
    364 	inst, err := ppc64asm.Decode(code, byteOrder)
    365 	var text string
    366 	size := inst.Len
    367 	if err != nil || size == 0 || inst.Op == 0 {
    368 		size = 4
    369 		text = "?"
    370 	} else {
    371 		text = ppc64asm.GoSyntax(inst, pc, lookup)
    372 	}
    373 	return text, size
    374 }
    375 
    376 var disasms = map[string]disasmFunc{
    377 	"386":      disasm_386,
    378 	"amd64":    disasm_amd64,
    379 	"amd64p32": disasm_amd64,
    380 	"arm":      disasm_arm,
    381 	"arm64":    disasm_arm64,
    382 	"ppc64":    disasm_ppc64,
    383 	"ppc64le":  disasm_ppc64,
    384 }
    385 
    386 var byteOrders = map[string]binary.ByteOrder{
    387 	"386":      binary.LittleEndian,
    388 	"amd64":    binary.LittleEndian,
    389 	"amd64p32": binary.LittleEndian,
    390 	"arm":      binary.LittleEndian,
    391 	"arm64":    binary.LittleEndian,
    392 	"ppc64":    binary.BigEndian,
    393 	"ppc64le":  binary.LittleEndian,
    394 	"s390x":    binary.BigEndian,
    395 }
    396 
    397 type Liner interface {
    398 	// Given a pc, returns the corresponding file, line, and function data.
    399 	// If unknown, returns "",0,nil.
    400 	PCToLine(uint64) (string, int, *gosym.Func)
    401 }
    402