Home | History | Annotate | Download | only in gosym
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package gosym implements access to the Go symbol
      6 // and line number tables embedded in Go binaries generated
      7 // by the gc compilers.
      8 package gosym
      9 
     10 // The table format is a variant of the format used in Plan 9's a.out
     11 // format, documented at https://9p.io/magic/man2html/6/a.out.
     12 // The best reference for the differences between the Plan 9 format
     13 // and the Go format is the runtime source, specifically ../../runtime/symtab.c.
     14 
     15 import (
     16 	"bytes"
     17 	"encoding/binary"
     18 	"fmt"
     19 	"strconv"
     20 	"strings"
     21 )
     22 
     23 /*
     24  * Symbols
     25  */
     26 
     27 // A Sym represents a single symbol table entry.
     28 type Sym struct {
     29 	Value  uint64
     30 	Type   byte
     31 	Name   string
     32 	GoType uint64
     33 	// If this symbol is a function symbol, the corresponding Func
     34 	Func *Func
     35 }
     36 
     37 // Static reports whether this symbol is static (not visible outside its file).
     38 func (s *Sym) Static() bool { return s.Type >= 'a' }
     39 
     40 // PackageName returns the package part of the symbol name,
     41 // or the empty string if there is none.
     42 func (s *Sym) PackageName() string {
     43 	pathend := strings.LastIndex(s.Name, "/")
     44 	if pathend < 0 {
     45 		pathend = 0
     46 	}
     47 
     48 	if i := strings.Index(s.Name[pathend:], "."); i != -1 {
     49 		return s.Name[:pathend+i]
     50 	}
     51 	return ""
     52 }
     53 
     54 // ReceiverName returns the receiver type name of this symbol,
     55 // or the empty string if there is none.
     56 func (s *Sym) ReceiverName() string {
     57 	pathend := strings.LastIndex(s.Name, "/")
     58 	if pathend < 0 {
     59 		pathend = 0
     60 	}
     61 	l := strings.Index(s.Name[pathend:], ".")
     62 	r := strings.LastIndex(s.Name[pathend:], ".")
     63 	if l == -1 || r == -1 || l == r {
     64 		return ""
     65 	}
     66 	return s.Name[pathend+l+1 : pathend+r]
     67 }
     68 
     69 // BaseName returns the symbol name without the package or receiver name.
     70 func (s *Sym) BaseName() string {
     71 	if i := strings.LastIndex(s.Name, "."); i != -1 {
     72 		return s.Name[i+1:]
     73 	}
     74 	return s.Name
     75 }
     76 
     77 // A Func collects information about a single function.
     78 type Func struct {
     79 	Entry uint64
     80 	*Sym
     81 	End       uint64
     82 	Params    []*Sym
     83 	Locals    []*Sym
     84 	FrameSize int
     85 	LineTable *LineTable
     86 	Obj       *Obj
     87 }
     88 
     89 // An Obj represents a collection of functions in a symbol table.
     90 //
     91 // The exact method of division of a binary into separate Objs is an internal detail
     92 // of the symbol table format.
     93 //
     94 // In early versions of Go each source file became a different Obj.
     95 //
     96 // In Go 1 and Go 1.1, each package produced one Obj for all Go sources
     97 // and one Obj per C source file.
     98 //
     99 // In Go 1.2, there is a single Obj for the entire program.
    100 type Obj struct {
    101 	// Funcs is a list of functions in the Obj.
    102 	Funcs []Func
    103 
    104 	// In Go 1.1 and earlier, Paths is a list of symbols corresponding
    105 	// to the source file names that produced the Obj.
    106 	// In Go 1.2, Paths is nil.
    107 	// Use the keys of Table.Files to obtain a list of source files.
    108 	Paths []Sym // meta
    109 }
    110 
    111 /*
    112  * Symbol tables
    113  */
    114 
    115 // Table represents a Go symbol table. It stores all of the
    116 // symbols decoded from the program and provides methods to translate
    117 // between symbols, names, and addresses.
    118 type Table struct {
    119 	Syms  []Sym
    120 	Funcs []Func
    121 	Files map[string]*Obj // nil for Go 1.2 and later binaries
    122 	Objs  []Obj           // nil for Go 1.2 and later binaries
    123 
    124 	go12line *LineTable // Go 1.2 line number table
    125 }
    126 
    127 type sym struct {
    128 	value  uint64
    129 	gotype uint64
    130 	typ    byte
    131 	name   []byte
    132 }
    133 
    134 var (
    135 	littleEndianSymtab    = []byte{0xFD, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00}
    136 	bigEndianSymtab       = []byte{0xFF, 0xFF, 0xFF, 0xFD, 0x00, 0x00, 0x00}
    137 	oldLittleEndianSymtab = []byte{0xFE, 0xFF, 0xFF, 0xFF, 0x00, 0x00}
    138 )
    139 
    140 func walksymtab(data []byte, fn func(sym) error) error {
    141 	if len(data) == 0 { // missing symtab is okay
    142 		return nil
    143 	}
    144 	var order binary.ByteOrder = binary.BigEndian
    145 	newTable := false
    146 	switch {
    147 	case bytes.HasPrefix(data, oldLittleEndianSymtab):
    148 		// Same as Go 1.0, but little endian.
    149 		// Format was used during interim development between Go 1.0 and Go 1.1.
    150 		// Should not be widespread, but easy to support.
    151 		data = data[6:]
    152 		order = binary.LittleEndian
    153 	case bytes.HasPrefix(data, bigEndianSymtab):
    154 		newTable = true
    155 	case bytes.HasPrefix(data, littleEndianSymtab):
    156 		newTable = true
    157 		order = binary.LittleEndian
    158 	}
    159 	var ptrsz int
    160 	if newTable {
    161 		if len(data) < 8 {
    162 			return &DecodingError{len(data), "unexpected EOF", nil}
    163 		}
    164 		ptrsz = int(data[7])
    165 		if ptrsz != 4 && ptrsz != 8 {
    166 			return &DecodingError{7, "invalid pointer size", ptrsz}
    167 		}
    168 		data = data[8:]
    169 	}
    170 	var s sym
    171 	p := data
    172 	for len(p) >= 4 {
    173 		var typ byte
    174 		if newTable {
    175 			// Symbol type, value, Go type.
    176 			typ = p[0] & 0x3F
    177 			wideValue := p[0]&0x40 != 0
    178 			goType := p[0]&0x80 != 0
    179 			if typ < 26 {
    180 				typ += 'A'
    181 			} else {
    182 				typ += 'a' - 26
    183 			}
    184 			s.typ = typ
    185 			p = p[1:]
    186 			if wideValue {
    187 				if len(p) < ptrsz {
    188 					return &DecodingError{len(data), "unexpected EOF", nil}
    189 				}
    190 				// fixed-width value
    191 				if ptrsz == 8 {
    192 					s.value = order.Uint64(p[0:8])
    193 					p = p[8:]
    194 				} else {
    195 					s.value = uint64(order.Uint32(p[0:4]))
    196 					p = p[4:]
    197 				}
    198 			} else {
    199 				// varint value
    200 				s.value = 0
    201 				shift := uint(0)
    202 				for len(p) > 0 && p[0]&0x80 != 0 {
    203 					s.value |= uint64(p[0]&0x7F) << shift
    204 					shift += 7
    205 					p = p[1:]
    206 				}
    207 				if len(p) == 0 {
    208 					return &DecodingError{len(data), "unexpected EOF", nil}
    209 				}
    210 				s.value |= uint64(p[0]) << shift
    211 				p = p[1:]
    212 			}
    213 			if goType {
    214 				if len(p) < ptrsz {
    215 					return &DecodingError{len(data), "unexpected EOF", nil}
    216 				}
    217 				// fixed-width go type
    218 				if ptrsz == 8 {
    219 					s.gotype = order.Uint64(p[0:8])
    220 					p = p[8:]
    221 				} else {
    222 					s.gotype = uint64(order.Uint32(p[0:4]))
    223 					p = p[4:]
    224 				}
    225 			}
    226 		} else {
    227 			// Value, symbol type.
    228 			s.value = uint64(order.Uint32(p[0:4]))
    229 			if len(p) < 5 {
    230 				return &DecodingError{len(data), "unexpected EOF", nil}
    231 			}
    232 			typ = p[4]
    233 			if typ&0x80 == 0 {
    234 				return &DecodingError{len(data) - len(p) + 4, "bad symbol type", typ}
    235 			}
    236 			typ &^= 0x80
    237 			s.typ = typ
    238 			p = p[5:]
    239 		}
    240 
    241 		// Name.
    242 		var i int
    243 		var nnul int
    244 		for i = 0; i < len(p); i++ {
    245 			if p[i] == 0 {
    246 				nnul = 1
    247 				break
    248 			}
    249 		}
    250 		switch typ {
    251 		case 'z', 'Z':
    252 			p = p[i+nnul:]
    253 			for i = 0; i+2 <= len(p); i += 2 {
    254 				if p[i] == 0 && p[i+1] == 0 {
    255 					nnul = 2
    256 					break
    257 				}
    258 			}
    259 		}
    260 		if len(p) < i+nnul {
    261 			return &DecodingError{len(data), "unexpected EOF", nil}
    262 		}
    263 		s.name = p[0:i]
    264 		i += nnul
    265 		p = p[i:]
    266 
    267 		if !newTable {
    268 			if len(p) < 4 {
    269 				return &DecodingError{len(data), "unexpected EOF", nil}
    270 			}
    271 			// Go type.
    272 			s.gotype = uint64(order.Uint32(p[:4]))
    273 			p = p[4:]
    274 		}
    275 		fn(s)
    276 	}
    277 	return nil
    278 }
    279 
    280 // NewTable decodes the Go symbol table in data,
    281 // returning an in-memory representation.
    282 func NewTable(symtab []byte, pcln *LineTable) (*Table, error) {
    283 	var n int
    284 	err := walksymtab(symtab, func(s sym) error {
    285 		n++
    286 		return nil
    287 	})
    288 	if err != nil {
    289 		return nil, err
    290 	}
    291 
    292 	var t Table
    293 	if pcln.isGo12() {
    294 		t.go12line = pcln
    295 	}
    296 	fname := make(map[uint16]string)
    297 	t.Syms = make([]Sym, 0, n)
    298 	nf := 0
    299 	nz := 0
    300 	lasttyp := uint8(0)
    301 	err = walksymtab(symtab, func(s sym) error {
    302 		n := len(t.Syms)
    303 		t.Syms = t.Syms[0 : n+1]
    304 		ts := &t.Syms[n]
    305 		ts.Type = s.typ
    306 		ts.Value = s.value
    307 		ts.GoType = s.gotype
    308 		switch s.typ {
    309 		default:
    310 			// rewrite name to use . instead of  (c2 b7)
    311 			w := 0
    312 			b := s.name
    313 			for i := 0; i < len(b); i++ {
    314 				if b[i] == 0xc2 && i+1 < len(b) && b[i+1] == 0xb7 {
    315 					i++
    316 					b[i] = '.'
    317 				}
    318 				b[w] = b[i]
    319 				w++
    320 			}
    321 			ts.Name = string(s.name[0:w])
    322 		case 'z', 'Z':
    323 			if lasttyp != 'z' && lasttyp != 'Z' {
    324 				nz++
    325 			}
    326 			for i := 0; i < len(s.name); i += 2 {
    327 				eltIdx := binary.BigEndian.Uint16(s.name[i : i+2])
    328 				elt, ok := fname[eltIdx]
    329 				if !ok {
    330 					return &DecodingError{-1, "bad filename code", eltIdx}
    331 				}
    332 				if n := len(ts.Name); n > 0 && ts.Name[n-1] != '/' {
    333 					ts.Name += "/"
    334 				}
    335 				ts.Name += elt
    336 			}
    337 		}
    338 		switch s.typ {
    339 		case 'T', 't', 'L', 'l':
    340 			nf++
    341 		case 'f':
    342 			fname[uint16(s.value)] = ts.Name
    343 		}
    344 		lasttyp = s.typ
    345 		return nil
    346 	})
    347 	if err != nil {
    348 		return nil, err
    349 	}
    350 
    351 	t.Funcs = make([]Func, 0, nf)
    352 	t.Files = make(map[string]*Obj)
    353 
    354 	var obj *Obj
    355 	if t.go12line != nil {
    356 		// Put all functions into one Obj.
    357 		t.Objs = make([]Obj, 1)
    358 		obj = &t.Objs[0]
    359 		t.go12line.go12MapFiles(t.Files, obj)
    360 	} else {
    361 		t.Objs = make([]Obj, 0, nz)
    362 	}
    363 
    364 	// Count text symbols and attach frame sizes, parameters, and
    365 	// locals to them. Also, find object file boundaries.
    366 	lastf := 0
    367 	for i := 0; i < len(t.Syms); i++ {
    368 		sym := &t.Syms[i]
    369 		switch sym.Type {
    370 		case 'Z', 'z': // path symbol
    371 			if t.go12line != nil {
    372 				// Go 1.2 binaries have the file information elsewhere. Ignore.
    373 				break
    374 			}
    375 			// Finish the current object
    376 			if obj != nil {
    377 				obj.Funcs = t.Funcs[lastf:]
    378 			}
    379 			lastf = len(t.Funcs)
    380 
    381 			// Start new object
    382 			n := len(t.Objs)
    383 			t.Objs = t.Objs[0 : n+1]
    384 			obj = &t.Objs[n]
    385 
    386 			// Count & copy path symbols
    387 			var end int
    388 			for end = i + 1; end < len(t.Syms); end++ {
    389 				if c := t.Syms[end].Type; c != 'Z' && c != 'z' {
    390 					break
    391 				}
    392 			}
    393 			obj.Paths = t.Syms[i:end]
    394 			i = end - 1 // loop will i++
    395 
    396 			// Record file names
    397 			depth := 0
    398 			for j := range obj.Paths {
    399 				s := &obj.Paths[j]
    400 				if s.Name == "" {
    401 					depth--
    402 				} else {
    403 					if depth == 0 {
    404 						t.Files[s.Name] = obj
    405 					}
    406 					depth++
    407 				}
    408 			}
    409 
    410 		case 'T', 't', 'L', 'l': // text symbol
    411 			if n := len(t.Funcs); n > 0 {
    412 				t.Funcs[n-1].End = sym.Value
    413 			}
    414 			if sym.Name == "runtime.etext" || sym.Name == "etext" {
    415 				continue
    416 			}
    417 
    418 			// Count parameter and local (auto) syms
    419 			var np, na int
    420 			var end int
    421 		countloop:
    422 			for end = i + 1; end < len(t.Syms); end++ {
    423 				switch t.Syms[end].Type {
    424 				case 'T', 't', 'L', 'l', 'Z', 'z':
    425 					break countloop
    426 				case 'p':
    427 					np++
    428 				case 'a':
    429 					na++
    430 				}
    431 			}
    432 
    433 			// Fill in the function symbol
    434 			n := len(t.Funcs)
    435 			t.Funcs = t.Funcs[0 : n+1]
    436 			fn := &t.Funcs[n]
    437 			sym.Func = fn
    438 			fn.Params = make([]*Sym, 0, np)
    439 			fn.Locals = make([]*Sym, 0, na)
    440 			fn.Sym = sym
    441 			fn.Entry = sym.Value
    442 			fn.Obj = obj
    443 			if t.go12line != nil {
    444 				// All functions share the same line table.
    445 				// It knows how to narrow down to a specific
    446 				// function quickly.
    447 				fn.LineTable = t.go12line
    448 			} else if pcln != nil {
    449 				fn.LineTable = pcln.slice(fn.Entry)
    450 				pcln = fn.LineTable
    451 			}
    452 			for j := i; j < end; j++ {
    453 				s := &t.Syms[j]
    454 				switch s.Type {
    455 				case 'm':
    456 					fn.FrameSize = int(s.Value)
    457 				case 'p':
    458 					n := len(fn.Params)
    459 					fn.Params = fn.Params[0 : n+1]
    460 					fn.Params[n] = s
    461 				case 'a':
    462 					n := len(fn.Locals)
    463 					fn.Locals = fn.Locals[0 : n+1]
    464 					fn.Locals[n] = s
    465 				}
    466 			}
    467 			i = end - 1 // loop will i++
    468 		}
    469 	}
    470 
    471 	if t.go12line != nil && nf == 0 {
    472 		t.Funcs = t.go12line.go12Funcs()
    473 	}
    474 	if obj != nil {
    475 		obj.Funcs = t.Funcs[lastf:]
    476 	}
    477 	return &t, nil
    478 }
    479 
    480 // PCToFunc returns the function containing the program counter pc,
    481 // or nil if there is no such function.
    482 func (t *Table) PCToFunc(pc uint64) *Func {
    483 	funcs := t.Funcs
    484 	for len(funcs) > 0 {
    485 		m := len(funcs) / 2
    486 		fn := &funcs[m]
    487 		switch {
    488 		case pc < fn.Entry:
    489 			funcs = funcs[0:m]
    490 		case fn.Entry <= pc && pc < fn.End:
    491 			return fn
    492 		default:
    493 			funcs = funcs[m+1:]
    494 		}
    495 	}
    496 	return nil
    497 }
    498 
    499 // PCToLine looks up line number information for a program counter.
    500 // If there is no information, it returns fn == nil.
    501 func (t *Table) PCToLine(pc uint64) (file string, line int, fn *Func) {
    502 	if fn = t.PCToFunc(pc); fn == nil {
    503 		return
    504 	}
    505 	if t.go12line != nil {
    506 		file = t.go12line.go12PCToFile(pc)
    507 		line = t.go12line.go12PCToLine(pc)
    508 	} else {
    509 		file, line = fn.Obj.lineFromAline(fn.LineTable.PCToLine(pc))
    510 	}
    511 	return
    512 }
    513 
    514 // LineToPC looks up the first program counter on the given line in
    515 // the named file. It returns UnknownPathError or UnknownLineError if
    516 // there is an error looking up this line.
    517 func (t *Table) LineToPC(file string, line int) (pc uint64, fn *Func, err error) {
    518 	obj, ok := t.Files[file]
    519 	if !ok {
    520 		return 0, nil, UnknownFileError(file)
    521 	}
    522 
    523 	if t.go12line != nil {
    524 		pc := t.go12line.go12LineToPC(file, line)
    525 		if pc == 0 {
    526 			return 0, nil, &UnknownLineError{file, line}
    527 		}
    528 		return pc, t.PCToFunc(pc), nil
    529 	}
    530 
    531 	abs, err := obj.alineFromLine(file, line)
    532 	if err != nil {
    533 		return
    534 	}
    535 	for i := range obj.Funcs {
    536 		f := &obj.Funcs[i]
    537 		pc := f.LineTable.LineToPC(abs, f.End)
    538 		if pc != 0 {
    539 			return pc, f, nil
    540 		}
    541 	}
    542 	return 0, nil, &UnknownLineError{file, line}
    543 }
    544 
    545 // LookupSym returns the text, data, or bss symbol with the given name,
    546 // or nil if no such symbol is found.
    547 func (t *Table) LookupSym(name string) *Sym {
    548 	// TODO(austin) Maybe make a map
    549 	for i := range t.Syms {
    550 		s := &t.Syms[i]
    551 		switch s.Type {
    552 		case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b':
    553 			if s.Name == name {
    554 				return s
    555 			}
    556 		}
    557 	}
    558 	return nil
    559 }
    560 
    561 // LookupFunc returns the text, data, or bss symbol with the given name,
    562 // or nil if no such symbol is found.
    563 func (t *Table) LookupFunc(name string) *Func {
    564 	for i := range t.Funcs {
    565 		f := &t.Funcs[i]
    566 		if f.Sym.Name == name {
    567 			return f
    568 		}
    569 	}
    570 	return nil
    571 }
    572 
    573 // SymByAddr returns the text, data, or bss symbol starting at the given address.
    574 func (t *Table) SymByAddr(addr uint64) *Sym {
    575 	for i := range t.Syms {
    576 		s := &t.Syms[i]
    577 		switch s.Type {
    578 		case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b':
    579 			if s.Value == addr {
    580 				return s
    581 			}
    582 		}
    583 	}
    584 	return nil
    585 }
    586 
    587 /*
    588  * Object files
    589  */
    590 
    591 // This is legacy code for Go 1.1 and earlier, which used the
    592 // Plan 9 format for pc-line tables. This code was never quite
    593 // correct. It's probably very close, and it's usually correct, but
    594 // we never quite found all the corner cases.
    595 //
    596 // Go 1.2 and later use a simpler format, documented at golang.org/s/go12symtab.
    597 
    598 func (o *Obj) lineFromAline(aline int) (string, int) {
    599 	type stackEnt struct {
    600 		path   string
    601 		start  int
    602 		offset int
    603 		prev   *stackEnt
    604 	}
    605 
    606 	noPath := &stackEnt{"", 0, 0, nil}
    607 	tos := noPath
    608 
    609 pathloop:
    610 	for _, s := range o.Paths {
    611 		val := int(s.Value)
    612 		switch {
    613 		case val > aline:
    614 			break pathloop
    615 
    616 		case val == 1:
    617 			// Start a new stack
    618 			tos = &stackEnt{s.Name, val, 0, noPath}
    619 
    620 		case s.Name == "":
    621 			// Pop
    622 			if tos == noPath {
    623 				return "<malformed symbol table>", 0
    624 			}
    625 			tos.prev.offset += val - tos.start
    626 			tos = tos.prev
    627 
    628 		default:
    629 			// Push
    630 			tos = &stackEnt{s.Name, val, 0, tos}
    631 		}
    632 	}
    633 
    634 	if tos == noPath {
    635 		return "", 0
    636 	}
    637 	return tos.path, aline - tos.start - tos.offset + 1
    638 }
    639 
    640 func (o *Obj) alineFromLine(path string, line int) (int, error) {
    641 	if line < 1 {
    642 		return 0, &UnknownLineError{path, line}
    643 	}
    644 
    645 	for i, s := range o.Paths {
    646 		// Find this path
    647 		if s.Name != path {
    648 			continue
    649 		}
    650 
    651 		// Find this line at this stack level
    652 		depth := 0
    653 		var incstart int
    654 		line += int(s.Value)
    655 	pathloop:
    656 		for _, s := range o.Paths[i:] {
    657 			val := int(s.Value)
    658 			switch {
    659 			case depth == 1 && val >= line:
    660 				return line - 1, nil
    661 
    662 			case s.Name == "":
    663 				depth--
    664 				if depth == 0 {
    665 					break pathloop
    666 				} else if depth == 1 {
    667 					line += val - incstart
    668 				}
    669 
    670 			default:
    671 				if depth == 1 {
    672 					incstart = val
    673 				}
    674 				depth++
    675 			}
    676 		}
    677 		return 0, &UnknownLineError{path, line}
    678 	}
    679 	return 0, UnknownFileError(path)
    680 }
    681 
    682 /*
    683  * Errors
    684  */
    685 
    686 // UnknownFileError represents a failure to find the specific file in
    687 // the symbol table.
    688 type UnknownFileError string
    689 
    690 func (e UnknownFileError) Error() string { return "unknown file: " + string(e) }
    691 
    692 // UnknownLineError represents a failure to map a line to a program
    693 // counter, either because the line is beyond the bounds of the file
    694 // or because there is no code on the given line.
    695 type UnknownLineError struct {
    696 	File string
    697 	Line int
    698 }
    699 
    700 func (e *UnknownLineError) Error() string {
    701 	return "no code at " + e.File + ":" + strconv.Itoa(e.Line)
    702 }
    703 
    704 // DecodingError represents an error during the decoding of
    705 // the symbol table.
    706 type DecodingError struct {
    707 	off int
    708 	msg string
    709 	val interface{}
    710 }
    711 
    712 func (e *DecodingError) Error() string {
    713 	msg := e.msg
    714 	if e.val != nil {
    715 		msg += fmt.Sprintf(" '%v'", e.val)
    716 	}
    717 	msg += fmt.Sprintf(" at byte %#x", e.off)
    718 	return msg
    719 }
    720