1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package binutils provides access to the GNU binutils. 16 package binutils 17 18 import ( 19 "debug/elf" 20 "debug/macho" 21 "fmt" 22 "os" 23 "os/exec" 24 "path/filepath" 25 "regexp" 26 "strings" 27 "sync" 28 29 "github.com/google/pprof/internal/elfexec" 30 "github.com/google/pprof/internal/plugin" 31 ) 32 33 // A Binutils implements plugin.ObjTool by invoking the GNU binutils. 34 type Binutils struct { 35 mu sync.Mutex 36 rep *binrep 37 } 38 39 // binrep is an immutable representation for Binutils. It is atomically 40 // replaced on every mutation to provide thread-safe access. 41 type binrep struct { 42 // Commands to invoke. 43 llvmSymbolizer string 44 llvmSymbolizerFound bool 45 addr2line string 46 addr2lineFound bool 47 nm string 48 nmFound bool 49 objdump string 50 objdumpFound bool 51 52 // if fast, perform symbolization using nm (symbol names only), 53 // instead of file-line detail from the slower addr2line. 54 fast bool 55 } 56 57 // get returns the current representation for bu, initializing it if necessary. 58 func (bu *Binutils) get() *binrep { 59 bu.mu.Lock() 60 r := bu.rep 61 if r == nil { 62 r = &binrep{} 63 initTools(r, "") 64 bu.rep = r 65 } 66 bu.mu.Unlock() 67 return r 68 } 69 70 // update modifies the rep for bu via the supplied function. 71 func (bu *Binutils) update(fn func(r *binrep)) { 72 r := &binrep{} 73 bu.mu.Lock() 74 defer bu.mu.Unlock() 75 if bu.rep == nil { 76 initTools(r, "") 77 } else { 78 *r = *bu.rep 79 } 80 fn(r) 81 bu.rep = r 82 } 83 84 // SetFastSymbolization sets a toggle that makes binutils use fast 85 // symbolization (using nm), which is much faster than addr2line but 86 // provides only symbol name information (no file/line). 87 func (bu *Binutils) SetFastSymbolization(fast bool) { 88 bu.update(func(r *binrep) { r.fast = fast }) 89 } 90 91 // SetTools processes the contents of the tools option. It 92 // expects a set of entries separated by commas; each entry is a pair 93 // of the form t:path, where cmd will be used to look only for the 94 // tool named t. If t is not specified, the path is searched for all 95 // tools. 96 func (bu *Binutils) SetTools(config string) { 97 bu.update(func(r *binrep) { initTools(r, config) }) 98 } 99 100 func initTools(b *binrep, config string) { 101 // paths collect paths per tool; Key "" contains the default. 102 paths := make(map[string][]string) 103 for _, t := range strings.Split(config, ",") { 104 name, path := "", t 105 if ct := strings.SplitN(t, ":", 2); len(ct) == 2 { 106 name, path = ct[0], ct[1] 107 } 108 paths[name] = append(paths[name], path) 109 } 110 111 defaultPath := paths[""] 112 b.llvmSymbolizer, b.llvmSymbolizerFound = findExe("llvm-symbolizer", append(paths["llvm-symbolizer"], defaultPath...)) 113 b.addr2line, b.addr2lineFound = findExe("addr2line", append(paths["addr2line"], defaultPath...)) 114 b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...)) 115 b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...)) 116 } 117 118 // findExe looks for an executable command on a set of paths. 119 // If it cannot find it, returns cmd. 120 func findExe(cmd string, paths []string) (string, bool) { 121 for _, p := range paths { 122 cp := filepath.Join(p, cmd) 123 if c, err := exec.LookPath(cp); err == nil { 124 return c, true 125 } 126 } 127 return cmd, false 128 } 129 130 // Disasm returns the assembly instructions for the specified address range 131 // of a binary. 132 func (bu *Binutils) Disasm(file string, start, end uint64) ([]plugin.Inst, error) { 133 b := bu.get() 134 cmd := exec.Command(b.objdump, "-d", "-C", "--no-show-raw-insn", "-l", 135 fmt.Sprintf("--start-address=%#x", start), 136 fmt.Sprintf("--stop-address=%#x", end), 137 file) 138 out, err := cmd.Output() 139 if err != nil { 140 return nil, fmt.Errorf("%v: %v", cmd.Args, err) 141 } 142 143 return disassemble(out) 144 } 145 146 // Open satisfies the plugin.ObjTool interface. 147 func (bu *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) { 148 b := bu.get() 149 150 // Make sure file is a supported executable. 151 // The pprof driver uses Open to sniff the difference 152 // between an executable and a profile. 153 // For now, only ELF is supported. 154 // Could read the first few bytes of the file and 155 // use a table of prefixes if we need to support other 156 // systems at some point. 157 158 if _, err := os.Stat(name); err != nil { 159 // For testing, do not require file name to exist. 160 if strings.Contains(b.addr2line, "testdata/") { 161 return &fileAddr2Line{file: file{b: b, name: name}}, nil 162 } 163 return nil, err 164 } 165 166 if f, err := b.openELF(name, start, limit, offset); err == nil { 167 return f, nil 168 } 169 if f, err := b.openMachO(name, start, limit, offset); err == nil { 170 return f, nil 171 } 172 return nil, fmt.Errorf("unrecognized binary: %s", name) 173 } 174 175 func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) { 176 of, err := macho.Open(name) 177 if err != nil { 178 return nil, fmt.Errorf("Parsing %s: %v", name, err) 179 } 180 defer of.Close() 181 182 if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) { 183 return &fileNM{file: file{b: b, name: name}}, nil 184 } 185 return &fileAddr2Line{file: file{b: b, name: name}}, nil 186 } 187 188 func (b *binrep) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) { 189 ef, err := elf.Open(name) 190 if err != nil { 191 return nil, fmt.Errorf("Parsing %s: %v", name, err) 192 } 193 defer ef.Close() 194 195 var stextOffset *uint64 196 var pageAligned = func(addr uint64) bool { return addr%4096 == 0 } 197 if strings.Contains(name, "vmlinux") || !pageAligned(start) || !pageAligned(limit) || !pageAligned(offset) { 198 // Reading all Symbols is expensive, and we only rarely need it so 199 // we don't want to do it every time. But if _stext happens to be 200 // page-aligned but isn't the same as Vaddr, we would symbolize 201 // wrong. So if the name the addresses aren't page aligned, or if 202 // the name is "vmlinux" we read _stext. We can be wrong if: (1) 203 // someone passes a kernel path that doesn't contain "vmlinux" AND 204 // (2) _stext is page-aligned AND (3) _stext is not at Vaddr 205 symbols, err := ef.Symbols() 206 if err != nil { 207 return nil, err 208 } 209 for _, s := range symbols { 210 if s.Name == "_stext" { 211 // The kernel may use _stext as the mapping start address. 212 stextOffset = &s.Value 213 break 214 } 215 } 216 } 217 218 base, err := elfexec.GetBase(&ef.FileHeader, nil, stextOffset, start, limit, offset) 219 if err != nil { 220 return nil, fmt.Errorf("Could not identify base for %s: %v", name, err) 221 } 222 223 buildID := "" 224 if f, err := os.Open(name); err == nil { 225 if id, err := elfexec.GetBuildID(f); err == nil { 226 buildID = fmt.Sprintf("%x", id) 227 } 228 } 229 if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) { 230 return &fileNM{file: file{b, name, base, buildID}}, nil 231 } 232 return &fileAddr2Line{file: file{b, name, base, buildID}}, nil 233 } 234 235 // file implements the binutils.ObjFile interface. 236 type file struct { 237 b *binrep 238 name string 239 base uint64 240 buildID string 241 } 242 243 func (f *file) Name() string { 244 return f.name 245 } 246 247 func (f *file) Base() uint64 { 248 return f.base 249 } 250 251 func (f *file) BuildID() string { 252 return f.buildID 253 } 254 255 func (f *file) SourceLine(addr uint64) ([]plugin.Frame, error) { 256 return []plugin.Frame{}, nil 257 } 258 259 func (f *file) Close() error { 260 return nil 261 } 262 263 func (f *file) Symbols(r *regexp.Regexp, addr uint64) ([]*plugin.Sym, error) { 264 // Get from nm a list of symbols sorted by address. 265 cmd := exec.Command(f.b.nm, "-n", f.name) 266 out, err := cmd.Output() 267 if err != nil { 268 return nil, fmt.Errorf("%v: %v", cmd.Args, err) 269 } 270 271 return findSymbols(out, f.name, r, addr) 272 } 273 274 // fileNM implements the binutils.ObjFile interface, using 'nm' to map 275 // addresses to symbols (without file/line number information). It is 276 // faster than fileAddr2Line. 277 type fileNM struct { 278 file 279 addr2linernm *addr2LinerNM 280 } 281 282 func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) { 283 if f.addr2linernm == nil { 284 addr2liner, err := newAddr2LinerNM(f.b.nm, f.name, f.base) 285 if err != nil { 286 return nil, err 287 } 288 f.addr2linernm = addr2liner 289 } 290 return f.addr2linernm.addrInfo(addr) 291 } 292 293 // fileAddr2Line implements the binutils.ObjFile interface, using 294 // 'addr2line' to map addresses to symbols (with file/line number 295 // information). It can be slow for large binaries with debug 296 // information. 297 type fileAddr2Line struct { 298 once sync.Once 299 file 300 addr2liner *addr2Liner 301 llvmSymbolizer *llvmSymbolizer 302 } 303 304 func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) { 305 f.once.Do(f.init) 306 if f.llvmSymbolizer != nil { 307 return f.llvmSymbolizer.addrInfo(addr) 308 } 309 if f.addr2liner != nil { 310 return f.addr2liner.addrInfo(addr) 311 } 312 return nil, fmt.Errorf("could not find local addr2liner") 313 } 314 315 func (f *fileAddr2Line) init() { 316 if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base); err == nil { 317 f.llvmSymbolizer = llvmSymbolizer 318 return 319 } 320 321 if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil { 322 f.addr2liner = addr2liner 323 324 // When addr2line encounters some gcc compiled binaries, it 325 // drops interesting parts of names in anonymous namespaces. 326 // Fallback to NM for better function names. 327 if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil { 328 f.addr2liner.nm = nm 329 } 330 } 331 } 332 333 func (f *fileAddr2Line) Close() error { 334 if f.llvmSymbolizer != nil { 335 f.llvmSymbolizer.rw.close() 336 f.llvmSymbolizer = nil 337 } 338 if f.addr2liner != nil { 339 f.addr2liner.rw.close() 340 f.addr2liner = nil 341 } 342 return nil 343 } 344