Home | History | Annotate | Download | only in binutils
      1 // Copyright 2014 Google Inc. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // Package binutils provides access to the GNU binutils.
     16 package binutils
     17 
     18 import (
     19 	"debug/elf"
     20 	"debug/macho"
     21 	"fmt"
     22 	"os"
     23 	"os/exec"
     24 	"path/filepath"
     25 	"regexp"
     26 	"strings"
     27 	"sync"
     28 
     29 	"github.com/google/pprof/internal/elfexec"
     30 	"github.com/google/pprof/internal/plugin"
     31 )
     32 
     33 // A Binutils implements plugin.ObjTool by invoking the GNU binutils.
     34 type Binutils struct {
     35 	mu  sync.Mutex
     36 	rep *binrep
     37 }
     38 
     39 // binrep is an immutable representation for Binutils.  It is atomically
     40 // replaced on every mutation to provide thread-safe access.
     41 type binrep struct {
     42 	// Commands to invoke.
     43 	llvmSymbolizer      string
     44 	llvmSymbolizerFound bool
     45 	addr2line           string
     46 	addr2lineFound      bool
     47 	nm                  string
     48 	nmFound             bool
     49 	objdump             string
     50 	objdumpFound        bool
     51 
     52 	// if fast, perform symbolization using nm (symbol names only),
     53 	// instead of file-line detail from the slower addr2line.
     54 	fast bool
     55 }
     56 
     57 // get returns the current representation for bu, initializing it if necessary.
     58 func (bu *Binutils) get() *binrep {
     59 	bu.mu.Lock()
     60 	r := bu.rep
     61 	if r == nil {
     62 		r = &binrep{}
     63 		initTools(r, "")
     64 		bu.rep = r
     65 	}
     66 	bu.mu.Unlock()
     67 	return r
     68 }
     69 
     70 // update modifies the rep for bu via the supplied function.
     71 func (bu *Binutils) update(fn func(r *binrep)) {
     72 	r := &binrep{}
     73 	bu.mu.Lock()
     74 	defer bu.mu.Unlock()
     75 	if bu.rep == nil {
     76 		initTools(r, "")
     77 	} else {
     78 		*r = *bu.rep
     79 	}
     80 	fn(r)
     81 	bu.rep = r
     82 }
     83 
     84 // SetFastSymbolization sets a toggle that makes binutils use fast
     85 // symbolization (using nm), which is much faster than addr2line but
     86 // provides only symbol name information (no file/line).
     87 func (bu *Binutils) SetFastSymbolization(fast bool) {
     88 	bu.update(func(r *binrep) { r.fast = fast })
     89 }
     90 
     91 // SetTools processes the contents of the tools option. It
     92 // expects a set of entries separated by commas; each entry is a pair
     93 // of the form t:path, where cmd will be used to look only for the
     94 // tool named t. If t is not specified, the path is searched for all
     95 // tools.
     96 func (bu *Binutils) SetTools(config string) {
     97 	bu.update(func(r *binrep) { initTools(r, config) })
     98 }
     99 
    100 func initTools(b *binrep, config string) {
    101 	// paths collect paths per tool; Key "" contains the default.
    102 	paths := make(map[string][]string)
    103 	for _, t := range strings.Split(config, ",") {
    104 		name, path := "", t
    105 		if ct := strings.SplitN(t, ":", 2); len(ct) == 2 {
    106 			name, path = ct[0], ct[1]
    107 		}
    108 		paths[name] = append(paths[name], path)
    109 	}
    110 
    111 	defaultPath := paths[""]
    112 	b.llvmSymbolizer, b.llvmSymbolizerFound = findExe("llvm-symbolizer", append(paths["llvm-symbolizer"], defaultPath...))
    113 	b.addr2line, b.addr2lineFound = findExe("addr2line", append(paths["addr2line"], defaultPath...))
    114 	b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
    115 	b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...))
    116 }
    117 
    118 // findExe looks for an executable command on a set of paths.
    119 // If it cannot find it, returns cmd.
    120 func findExe(cmd string, paths []string) (string, bool) {
    121 	for _, p := range paths {
    122 		cp := filepath.Join(p, cmd)
    123 		if c, err := exec.LookPath(cp); err == nil {
    124 			return c, true
    125 		}
    126 	}
    127 	return cmd, false
    128 }
    129 
    130 // Disasm returns the assembly instructions for the specified address range
    131 // of a binary.
    132 func (bu *Binutils) Disasm(file string, start, end uint64) ([]plugin.Inst, error) {
    133 	b := bu.get()
    134 	cmd := exec.Command(b.objdump, "-d", "-C", "--no-show-raw-insn", "-l",
    135 		fmt.Sprintf("--start-address=%#x", start),
    136 		fmt.Sprintf("--stop-address=%#x", end),
    137 		file)
    138 	out, err := cmd.Output()
    139 	if err != nil {
    140 		return nil, fmt.Errorf("%v: %v", cmd.Args, err)
    141 	}
    142 
    143 	return disassemble(out)
    144 }
    145 
    146 // Open satisfies the plugin.ObjTool interface.
    147 func (bu *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
    148 	b := bu.get()
    149 
    150 	// Make sure file is a supported executable.
    151 	// The pprof driver uses Open to sniff the difference
    152 	// between an executable and a profile.
    153 	// For now, only ELF is supported.
    154 	// Could read the first few bytes of the file and
    155 	// use a table of prefixes if we need to support other
    156 	// systems at some point.
    157 
    158 	if _, err := os.Stat(name); err != nil {
    159 		// For testing, do not require file name to exist.
    160 		if strings.Contains(b.addr2line, "testdata/") {
    161 			return &fileAddr2Line{file: file{b: b, name: name}}, nil
    162 		}
    163 		return nil, err
    164 	}
    165 
    166 	if f, err := b.openELF(name, start, limit, offset); err == nil {
    167 		return f, nil
    168 	}
    169 	if f, err := b.openMachO(name, start, limit, offset); err == nil {
    170 		return f, nil
    171 	}
    172 	return nil, fmt.Errorf("unrecognized binary: %s", name)
    173 }
    174 
    175 func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
    176 	of, err := macho.Open(name)
    177 	if err != nil {
    178 		return nil, fmt.Errorf("Parsing %s: %v", name, err)
    179 	}
    180 	defer of.Close()
    181 
    182 	if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
    183 		return &fileNM{file: file{b: b, name: name}}, nil
    184 	}
    185 	return &fileAddr2Line{file: file{b: b, name: name}}, nil
    186 }
    187 
    188 func (b *binrep) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
    189 	ef, err := elf.Open(name)
    190 	if err != nil {
    191 		return nil, fmt.Errorf("Parsing %s: %v", name, err)
    192 	}
    193 	defer ef.Close()
    194 
    195 	var stextOffset *uint64
    196 	var pageAligned = func(addr uint64) bool { return addr%4096 == 0 }
    197 	if strings.Contains(name, "vmlinux") || !pageAligned(start) || !pageAligned(limit) || !pageAligned(offset) {
    198 		// Reading all Symbols is expensive, and we only rarely need it so
    199 		// we don't want to do it every time. But if _stext happens to be
    200 		// page-aligned but isn't the same as Vaddr, we would symbolize
    201 		// wrong. So if the name the addresses aren't page aligned, or if
    202 		// the name is "vmlinux" we read _stext. We can be wrong if: (1)
    203 		// someone passes a kernel path that doesn't contain "vmlinux" AND
    204 		// (2) _stext is page-aligned AND (3) _stext is not at Vaddr
    205 		symbols, err := ef.Symbols()
    206 		if err != nil {
    207 			return nil, err
    208 		}
    209 		for _, s := range symbols {
    210 			if s.Name == "_stext" {
    211 				// The kernel may use _stext as the mapping start address.
    212 				stextOffset = &s.Value
    213 				break
    214 			}
    215 		}
    216 	}
    217 
    218 	base, err := elfexec.GetBase(&ef.FileHeader, nil, stextOffset, start, limit, offset)
    219 	if err != nil {
    220 		return nil, fmt.Errorf("Could not identify base for %s: %v", name, err)
    221 	}
    222 
    223 	buildID := ""
    224 	if f, err := os.Open(name); err == nil {
    225 		if id, err := elfexec.GetBuildID(f); err == nil {
    226 			buildID = fmt.Sprintf("%x", id)
    227 		}
    228 	}
    229 	if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
    230 		return &fileNM{file: file{b, name, base, buildID}}, nil
    231 	}
    232 	return &fileAddr2Line{file: file{b, name, base, buildID}}, nil
    233 }
    234 
    235 // file implements the binutils.ObjFile interface.
    236 type file struct {
    237 	b       *binrep
    238 	name    string
    239 	base    uint64
    240 	buildID string
    241 }
    242 
    243 func (f *file) Name() string {
    244 	return f.name
    245 }
    246 
    247 func (f *file) Base() uint64 {
    248 	return f.base
    249 }
    250 
    251 func (f *file) BuildID() string {
    252 	return f.buildID
    253 }
    254 
    255 func (f *file) SourceLine(addr uint64) ([]plugin.Frame, error) {
    256 	return []plugin.Frame{}, nil
    257 }
    258 
    259 func (f *file) Close() error {
    260 	return nil
    261 }
    262 
    263 func (f *file) Symbols(r *regexp.Regexp, addr uint64) ([]*plugin.Sym, error) {
    264 	// Get from nm a list of symbols sorted by address.
    265 	cmd := exec.Command(f.b.nm, "-n", f.name)
    266 	out, err := cmd.Output()
    267 	if err != nil {
    268 		return nil, fmt.Errorf("%v: %v", cmd.Args, err)
    269 	}
    270 
    271 	return findSymbols(out, f.name, r, addr)
    272 }
    273 
    274 // fileNM implements the binutils.ObjFile interface, using 'nm' to map
    275 // addresses to symbols (without file/line number information). It is
    276 // faster than fileAddr2Line.
    277 type fileNM struct {
    278 	file
    279 	addr2linernm *addr2LinerNM
    280 }
    281 
    282 func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) {
    283 	if f.addr2linernm == nil {
    284 		addr2liner, err := newAddr2LinerNM(f.b.nm, f.name, f.base)
    285 		if err != nil {
    286 			return nil, err
    287 		}
    288 		f.addr2linernm = addr2liner
    289 	}
    290 	return f.addr2linernm.addrInfo(addr)
    291 }
    292 
    293 // fileAddr2Line implements the binutils.ObjFile interface, using
    294 // 'addr2line' to map addresses to symbols (with file/line number
    295 // information). It can be slow for large binaries with debug
    296 // information.
    297 type fileAddr2Line struct {
    298 	once sync.Once
    299 	file
    300 	addr2liner     *addr2Liner
    301 	llvmSymbolizer *llvmSymbolizer
    302 }
    303 
    304 func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
    305 	f.once.Do(f.init)
    306 	if f.llvmSymbolizer != nil {
    307 		return f.llvmSymbolizer.addrInfo(addr)
    308 	}
    309 	if f.addr2liner != nil {
    310 		return f.addr2liner.addrInfo(addr)
    311 	}
    312 	return nil, fmt.Errorf("could not find local addr2liner")
    313 }
    314 
    315 func (f *fileAddr2Line) init() {
    316 	if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base); err == nil {
    317 		f.llvmSymbolizer = llvmSymbolizer
    318 		return
    319 	}
    320 
    321 	if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil {
    322 		f.addr2liner = addr2liner
    323 
    324 		// When addr2line encounters some gcc compiled binaries, it
    325 		// drops interesting parts of names in anonymous namespaces.
    326 		// Fallback to NM for better function names.
    327 		if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
    328 			f.addr2liner.nm = nm
    329 		}
    330 	}
    331 }
    332 
    333 func (f *fileAddr2Line) Close() error {
    334 	if f.llvmSymbolizer != nil {
    335 		f.llvmSymbolizer.rw.close()
    336 		f.llvmSymbolizer = nil
    337 	}
    338 	if f.addr2liner != nil {
    339 		f.addr2liner.rw.close()
    340 		f.addr2liner = nil
    341 	}
    342 	return nil
    343 }
    344