Home | History | Annotate | Download | only in objfile
      1 // Copyright 2014 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package objfile
      6 
      7 import (
      8 	"bufio"
      9 	"debug/gosym"
     10 	"encoding/binary"
     11 	"fmt"
     12 	"io"
     13 	"regexp"
     14 	"sort"
     15 	"strings"
     16 	"text/tabwriter"
     17 
     18 	"cmd/internal/rsc.io/arm/armasm"
     19 	"cmd/internal/rsc.io/x86/x86asm"
     20 )
     21 
     22 // Disasm is a disassembler for a given File.
     23 type Disasm struct {
     24 	syms      []Sym            //symbols in file, sorted by address
     25 	pcln      *gosym.Table     // pcln table
     26 	text      []byte           // bytes of text segment (actual instructions)
     27 	textStart uint64           // start PC of text
     28 	textEnd   uint64           // end PC of text
     29 	goarch    string           // GOARCH string
     30 	disasm    disasmFunc       // disassembler function for goarch
     31 	byteOrder binary.ByteOrder // byte order for goarch
     32 }
     33 
     34 // Disasm returns a disassembler for the file f.
     35 func (f *File) Disasm() (*Disasm, error) {
     36 	syms, err := f.Symbols()
     37 	if err != nil {
     38 		return nil, err
     39 	}
     40 
     41 	pcln, err := f.PCLineTable()
     42 	if err != nil {
     43 		return nil, err
     44 	}
     45 
     46 	textStart, textBytes, err := f.Text()
     47 	if err != nil {
     48 		return nil, err
     49 	}
     50 
     51 	goarch := f.GOARCH()
     52 	disasm := disasms[goarch]
     53 	byteOrder := byteOrders[goarch]
     54 	if disasm == nil || byteOrder == nil {
     55 		return nil, fmt.Errorf("unsupported architecture")
     56 	}
     57 
     58 	// Filter out section symbols, overwriting syms in place.
     59 	keep := syms[:0]
     60 	for _, sym := range syms {
     61 		switch sym.Name {
     62 		case "runtime.text", "text", "_text", "runtime.etext", "etext", "_etext":
     63 			// drop
     64 		default:
     65 			keep = append(keep, sym)
     66 		}
     67 	}
     68 	syms = keep
     69 	d := &Disasm{
     70 		syms:      syms,
     71 		pcln:      pcln,
     72 		text:      textBytes,
     73 		textStart: textStart,
     74 		textEnd:   textStart + uint64(len(textBytes)),
     75 		goarch:    goarch,
     76 		disasm:    disasm,
     77 		byteOrder: byteOrder,
     78 	}
     79 
     80 	return d, nil
     81 }
     82 
     83 // lookup finds the symbol name containing addr.
     84 func (d *Disasm) lookup(addr uint64) (name string, base uint64) {
     85 	i := sort.Search(len(d.syms), func(i int) bool { return addr < d.syms[i].Addr })
     86 	if i > 0 {
     87 		s := d.syms[i-1]
     88 		if s.Addr != 0 && s.Addr <= addr && addr < s.Addr+uint64(s.Size) {
     89 			return s.Name, s.Addr
     90 		}
     91 	}
     92 	return "", 0
     93 }
     94 
     95 // base returns the final element in the path.
     96 // It works on both Windows and Unix paths,
     97 // regardless of host operating system.
     98 func base(path string) string {
     99 	path = path[strings.LastIndex(path, "/")+1:]
    100 	path = path[strings.LastIndex(path, `\`)+1:]
    101 	return path
    102 }
    103 
    104 // Print prints a disassembly of the file to w.
    105 // If filter is non-nil, the disassembly only includes functions with names matching filter.
    106 // The disassembly only includes functions that overlap the range [start, end).
    107 func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64) {
    108 	if start < d.textStart {
    109 		start = d.textStart
    110 	}
    111 	if end > d.textEnd {
    112 		end = d.textEnd
    113 	}
    114 	printed := false
    115 	bw := bufio.NewWriter(w)
    116 	for _, sym := range d.syms {
    117 		symStart := sym.Addr
    118 		symEnd := sym.Addr + uint64(sym.Size)
    119 		if sym.Code != 'T' && sym.Code != 't' ||
    120 			symStart < d.textStart ||
    121 			symEnd <= start || end <= symStart ||
    122 			filter != nil && !filter.MatchString(sym.Name) {
    123 			continue
    124 		}
    125 		if printed {
    126 			fmt.Fprintf(bw, "\n")
    127 		}
    128 		printed = true
    129 
    130 		file, _, _ := d.pcln.PCToLine(sym.Addr)
    131 		fmt.Fprintf(bw, "TEXT %s(SB) %s\n", sym.Name, file)
    132 
    133 		tw := tabwriter.NewWriter(bw, 1, 8, 1, '\t', 0)
    134 		if symEnd > end {
    135 			symEnd = end
    136 		}
    137 		code := d.text[:end-d.textStart]
    138 		d.Decode(symStart, symEnd, func(pc, size uint64, file string, line int, text string) {
    139 			i := pc - d.textStart
    140 			fmt.Fprintf(tw, "\t%s:%d\t%#x\t", base(file), line, pc)
    141 			if size%4 != 0 || d.goarch == "386" || d.goarch == "amd64" {
    142 				// Print instruction as bytes.
    143 				fmt.Fprintf(tw, "%x", code[i:i+size])
    144 			} else {
    145 				// Print instruction as 32-bit words.
    146 				for j := uint64(0); j < size; j += 4 {
    147 					if j > 0 {
    148 						fmt.Fprintf(tw, " ")
    149 					}
    150 					fmt.Fprintf(tw, "%08x", d.byteOrder.Uint32(code[i+j:]))
    151 				}
    152 			}
    153 			fmt.Fprintf(tw, "\t%s\n", text)
    154 		})
    155 		tw.Flush()
    156 	}
    157 	bw.Flush()
    158 }
    159 
    160 // Decode disassembles the text segment range [start, end), calling f for each instruction.
    161 func (d *Disasm) Decode(start, end uint64, f func(pc, size uint64, file string, line int, text string)) {
    162 	if start < d.textStart {
    163 		start = d.textStart
    164 	}
    165 	if end > d.textEnd {
    166 		end = d.textEnd
    167 	}
    168 	code := d.text[:end-d.textStart]
    169 	lookup := d.lookup
    170 	for pc := start; pc < end; {
    171 		i := pc - d.textStart
    172 		text, size := d.disasm(code[i:], pc, lookup)
    173 		file, line, _ := d.pcln.PCToLine(pc)
    174 		f(pc, uint64(size), file, line, text)
    175 		pc += uint64(size)
    176 	}
    177 }
    178 
    179 type lookupFunc func(addr uint64) (sym string, base uint64)
    180 type disasmFunc func(code []byte, pc uint64, lookup lookupFunc) (text string, size int)
    181 
    182 func disasm_386(code []byte, pc uint64, lookup lookupFunc) (string, int) {
    183 	return disasm_x86(code, pc, lookup, 32)
    184 }
    185 
    186 func disasm_amd64(code []byte, pc uint64, lookup lookupFunc) (string, int) {
    187 	return disasm_x86(code, pc, lookup, 64)
    188 }
    189 
    190 func disasm_x86(code []byte, pc uint64, lookup lookupFunc, arch int) (string, int) {
    191 	inst, err := x86asm.Decode(code, 64)
    192 	var text string
    193 	size := inst.Len
    194 	if err != nil || size == 0 || inst.Op == 0 {
    195 		size = 1
    196 		text = "?"
    197 	} else {
    198 		text = x86asm.Plan9Syntax(inst, pc, lookup)
    199 	}
    200 	return text, size
    201 }
    202 
    203 type textReader struct {
    204 	code []byte
    205 	pc   uint64
    206 }
    207 
    208 func (r textReader) ReadAt(data []byte, off int64) (n int, err error) {
    209 	if off < 0 || uint64(off) < r.pc {
    210 		return 0, io.EOF
    211 	}
    212 	d := uint64(off) - r.pc
    213 	if d >= uint64(len(r.code)) {
    214 		return 0, io.EOF
    215 	}
    216 	n = copy(data, r.code[d:])
    217 	if n < len(data) {
    218 		err = io.ErrUnexpectedEOF
    219 	}
    220 	return
    221 }
    222 
    223 func disasm_arm(code []byte, pc uint64, lookup lookupFunc) (string, int) {
    224 	inst, err := armasm.Decode(code, armasm.ModeARM)
    225 	var text string
    226 	size := inst.Len
    227 	if err != nil || size == 0 || inst.Op == 0 {
    228 		size = 4
    229 		text = "?"
    230 	} else {
    231 		text = armasm.Plan9Syntax(inst, pc, lookup, textReader{code, pc})
    232 	}
    233 	return text, size
    234 }
    235 
    236 var disasms = map[string]disasmFunc{
    237 	"386":   disasm_386,
    238 	"amd64": disasm_amd64,
    239 	"arm":   disasm_arm,
    240 }
    241 
    242 var byteOrders = map[string]binary.ByteOrder{
    243 	"386":     binary.LittleEndian,
    244 	"amd64":   binary.LittleEndian,
    245 	"arm":     binary.LittleEndian,
    246 	"ppc64":   binary.BigEndian,
    247 	"ppc64le": binary.LittleEndian,
    248 }
    249