Home | History | Annotate | Download | only in gosym
      1 // Copyright 2009 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package gosym implements access to the Go symbol
      6 // and line number tables embedded in Go binaries generated
      7 // by the gc compilers.
      8 package gosym
      9 
     10 // The table format is a variant of the format used in Plan 9's a.out
     11 // format, documented at http://plan9.bell-labs.com/magic/man2html/6/a.out.
     12 // The best reference for the differences between the Plan 9 format
     13 // and the Go format is the runtime source, specifically ../../runtime/symtab.c.
     14 
     15 import (
     16 	"bytes"
     17 	"encoding/binary"
     18 	"fmt"
     19 	"strconv"
     20 	"strings"
     21 )
     22 
     23 /*
     24  * Symbols
     25  */
     26 
     27 // A Sym represents a single symbol table entry.
     28 type Sym struct {
     29 	Value  uint64
     30 	Type   byte
     31 	Name   string
     32 	GoType uint64
     33 	// If this symbol is a function symbol, the corresponding Func
     34 	Func *Func
     35 }
     36 
     37 // Static reports whether this symbol is static (not visible outside its file).
     38 func (s *Sym) Static() bool { return s.Type >= 'a' }
     39 
     40 // PackageName returns the package part of the symbol name,
     41 // or the empty string if there is none.
     42 func (s *Sym) PackageName() string {
     43 	if i := strings.Index(s.Name, "."); i != -1 {
     44 		return s.Name[0:i]
     45 	}
     46 	return ""
     47 }
     48 
     49 // ReceiverName returns the receiver type name of this symbol,
     50 // or the empty string if there is none.
     51 func (s *Sym) ReceiverName() string {
     52 	l := strings.Index(s.Name, ".")
     53 	r := strings.LastIndex(s.Name, ".")
     54 	if l == -1 || r == -1 || l == r {
     55 		return ""
     56 	}
     57 	return s.Name[l+1 : r]
     58 }
     59 
     60 // BaseName returns the symbol name without the package or receiver name.
     61 func (s *Sym) BaseName() string {
     62 	if i := strings.LastIndex(s.Name, "."); i != -1 {
     63 		return s.Name[i+1:]
     64 	}
     65 	return s.Name
     66 }
     67 
     68 // A Func collects information about a single function.
     69 type Func struct {
     70 	Entry uint64
     71 	*Sym
     72 	End       uint64
     73 	Params    []*Sym
     74 	Locals    []*Sym
     75 	FrameSize int
     76 	LineTable *LineTable
     77 	Obj       *Obj
     78 }
     79 
     80 // An Obj represents a collection of functions in a symbol table.
     81 //
     82 // The exact method of division of a binary into separate Objs is an internal detail
     83 // of the symbol table format.
     84 //
     85 // In early versions of Go each source file became a different Obj.
     86 //
     87 // In Go 1 and Go 1.1, each package produced one Obj for all Go sources
     88 // and one Obj per C source file.
     89 //
     90 // In Go 1.2, there is a single Obj for the entire program.
     91 type Obj struct {
     92 	// Funcs is a list of functions in the Obj.
     93 	Funcs []Func
     94 
     95 	// In Go 1.1 and earlier, Paths is a list of symbols corresponding
     96 	// to the source file names that produced the Obj.
     97 	// In Go 1.2, Paths is nil.
     98 	// Use the keys of Table.Files to obtain a list of source files.
     99 	Paths []Sym // meta
    100 }
    101 
    102 /*
    103  * Symbol tables
    104  */
    105 
    106 // Table represents a Go symbol table.  It stores all of the
    107 // symbols decoded from the program and provides methods to translate
    108 // between symbols, names, and addresses.
    109 type Table struct {
    110 	Syms  []Sym
    111 	Funcs []Func
    112 	Files map[string]*Obj // nil for Go 1.2 and later binaries
    113 	Objs  []Obj           // nil for Go 1.2 and later binaries
    114 
    115 	go12line *LineTable // Go 1.2 line number table
    116 }
    117 
    118 type sym struct {
    119 	value  uint64
    120 	gotype uint64
    121 	typ    byte
    122 	name   []byte
    123 }
    124 
    125 var (
    126 	littleEndianSymtab    = []byte{0xFD, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00}
    127 	bigEndianSymtab       = []byte{0xFF, 0xFF, 0xFF, 0xFD, 0x00, 0x00, 0x00}
    128 	oldLittleEndianSymtab = []byte{0xFE, 0xFF, 0xFF, 0xFF, 0x00, 0x00}
    129 )
    130 
    131 func walksymtab(data []byte, fn func(sym) error) error {
    132 	if len(data) == 0 { // missing symtab is okay
    133 		return nil
    134 	}
    135 	var order binary.ByteOrder = binary.BigEndian
    136 	newTable := false
    137 	switch {
    138 	case bytes.HasPrefix(data, oldLittleEndianSymtab):
    139 		// Same as Go 1.0, but little endian.
    140 		// Format was used during interim development between Go 1.0 and Go 1.1.
    141 		// Should not be widespread, but easy to support.
    142 		data = data[6:]
    143 		order = binary.LittleEndian
    144 	case bytes.HasPrefix(data, bigEndianSymtab):
    145 		newTable = true
    146 	case bytes.HasPrefix(data, littleEndianSymtab):
    147 		newTable = true
    148 		order = binary.LittleEndian
    149 	}
    150 	var ptrsz int
    151 	if newTable {
    152 		if len(data) < 8 {
    153 			return &DecodingError{len(data), "unexpected EOF", nil}
    154 		}
    155 		ptrsz = int(data[7])
    156 		if ptrsz != 4 && ptrsz != 8 {
    157 			return &DecodingError{7, "invalid pointer size", ptrsz}
    158 		}
    159 		data = data[8:]
    160 	}
    161 	var s sym
    162 	p := data
    163 	for len(p) >= 4 {
    164 		var typ byte
    165 		if newTable {
    166 			// Symbol type, value, Go type.
    167 			typ = p[0] & 0x3F
    168 			wideValue := p[0]&0x40 != 0
    169 			goType := p[0]&0x80 != 0
    170 			if typ < 26 {
    171 				typ += 'A'
    172 			} else {
    173 				typ += 'a' - 26
    174 			}
    175 			s.typ = typ
    176 			p = p[1:]
    177 			if wideValue {
    178 				if len(p) < ptrsz {
    179 					return &DecodingError{len(data), "unexpected EOF", nil}
    180 				}
    181 				// fixed-width value
    182 				if ptrsz == 8 {
    183 					s.value = order.Uint64(p[0:8])
    184 					p = p[8:]
    185 				} else {
    186 					s.value = uint64(order.Uint32(p[0:4]))
    187 					p = p[4:]
    188 				}
    189 			} else {
    190 				// varint value
    191 				s.value = 0
    192 				shift := uint(0)
    193 				for len(p) > 0 && p[0]&0x80 != 0 {
    194 					s.value |= uint64(p[0]&0x7F) << shift
    195 					shift += 7
    196 					p = p[1:]
    197 				}
    198 				if len(p) == 0 {
    199 					return &DecodingError{len(data), "unexpected EOF", nil}
    200 				}
    201 				s.value |= uint64(p[0]) << shift
    202 				p = p[1:]
    203 			}
    204 			if goType {
    205 				if len(p) < ptrsz {
    206 					return &DecodingError{len(data), "unexpected EOF", nil}
    207 				}
    208 				// fixed-width go type
    209 				if ptrsz == 8 {
    210 					s.gotype = order.Uint64(p[0:8])
    211 					p = p[8:]
    212 				} else {
    213 					s.gotype = uint64(order.Uint32(p[0:4]))
    214 					p = p[4:]
    215 				}
    216 			}
    217 		} else {
    218 			// Value, symbol type.
    219 			s.value = uint64(order.Uint32(p[0:4]))
    220 			if len(p) < 5 {
    221 				return &DecodingError{len(data), "unexpected EOF", nil}
    222 			}
    223 			typ = p[4]
    224 			if typ&0x80 == 0 {
    225 				return &DecodingError{len(data) - len(p) + 4, "bad symbol type", typ}
    226 			}
    227 			typ &^= 0x80
    228 			s.typ = typ
    229 			p = p[5:]
    230 		}
    231 
    232 		// Name.
    233 		var i int
    234 		var nnul int
    235 		for i = 0; i < len(p); i++ {
    236 			if p[i] == 0 {
    237 				nnul = 1
    238 				break
    239 			}
    240 		}
    241 		switch typ {
    242 		case 'z', 'Z':
    243 			p = p[i+nnul:]
    244 			for i = 0; i+2 <= len(p); i += 2 {
    245 				if p[i] == 0 && p[i+1] == 0 {
    246 					nnul = 2
    247 					break
    248 				}
    249 			}
    250 		}
    251 		if len(p) < i+nnul {
    252 			return &DecodingError{len(data), "unexpected EOF", nil}
    253 		}
    254 		s.name = p[0:i]
    255 		i += nnul
    256 		p = p[i:]
    257 
    258 		if !newTable {
    259 			if len(p) < 4 {
    260 				return &DecodingError{len(data), "unexpected EOF", nil}
    261 			}
    262 			// Go type.
    263 			s.gotype = uint64(order.Uint32(p[:4]))
    264 			p = p[4:]
    265 		}
    266 		fn(s)
    267 	}
    268 	return nil
    269 }
    270 
    271 // NewTable decodes the Go symbol table in data,
    272 // returning an in-memory representation.
    273 func NewTable(symtab []byte, pcln *LineTable) (*Table, error) {
    274 	var n int
    275 	err := walksymtab(symtab, func(s sym) error {
    276 		n++
    277 		return nil
    278 	})
    279 	if err != nil {
    280 		return nil, err
    281 	}
    282 
    283 	var t Table
    284 	if pcln.isGo12() {
    285 		t.go12line = pcln
    286 	}
    287 	fname := make(map[uint16]string)
    288 	t.Syms = make([]Sym, 0, n)
    289 	nf := 0
    290 	nz := 0
    291 	lasttyp := uint8(0)
    292 	err = walksymtab(symtab, func(s sym) error {
    293 		n := len(t.Syms)
    294 		t.Syms = t.Syms[0 : n+1]
    295 		ts := &t.Syms[n]
    296 		ts.Type = s.typ
    297 		ts.Value = uint64(s.value)
    298 		ts.GoType = uint64(s.gotype)
    299 		switch s.typ {
    300 		default:
    301 			// rewrite name to use . instead of  (c2 b7)
    302 			w := 0
    303 			b := s.name
    304 			for i := 0; i < len(b); i++ {
    305 				if b[i] == 0xc2 && i+1 < len(b) && b[i+1] == 0xb7 {
    306 					i++
    307 					b[i] = '.'
    308 				}
    309 				b[w] = b[i]
    310 				w++
    311 			}
    312 			ts.Name = string(s.name[0:w])
    313 		case 'z', 'Z':
    314 			if lasttyp != 'z' && lasttyp != 'Z' {
    315 				nz++
    316 			}
    317 			for i := 0; i < len(s.name); i += 2 {
    318 				eltIdx := binary.BigEndian.Uint16(s.name[i : i+2])
    319 				elt, ok := fname[eltIdx]
    320 				if !ok {
    321 					return &DecodingError{-1, "bad filename code", eltIdx}
    322 				}
    323 				if n := len(ts.Name); n > 0 && ts.Name[n-1] != '/' {
    324 					ts.Name += "/"
    325 				}
    326 				ts.Name += elt
    327 			}
    328 		}
    329 		switch s.typ {
    330 		case 'T', 't', 'L', 'l':
    331 			nf++
    332 		case 'f':
    333 			fname[uint16(s.value)] = ts.Name
    334 		}
    335 		lasttyp = s.typ
    336 		return nil
    337 	})
    338 	if err != nil {
    339 		return nil, err
    340 	}
    341 
    342 	t.Funcs = make([]Func, 0, nf)
    343 	t.Files = make(map[string]*Obj)
    344 
    345 	var obj *Obj
    346 	if t.go12line != nil {
    347 		// Put all functions into one Obj.
    348 		t.Objs = make([]Obj, 1)
    349 		obj = &t.Objs[0]
    350 		t.go12line.go12MapFiles(t.Files, obj)
    351 	} else {
    352 		t.Objs = make([]Obj, 0, nz)
    353 	}
    354 
    355 	// Count text symbols and attach frame sizes, parameters, and
    356 	// locals to them.  Also, find object file boundaries.
    357 	lastf := 0
    358 	for i := 0; i < len(t.Syms); i++ {
    359 		sym := &t.Syms[i]
    360 		switch sym.Type {
    361 		case 'Z', 'z': // path symbol
    362 			if t.go12line != nil {
    363 				// Go 1.2 binaries have the file information elsewhere. Ignore.
    364 				break
    365 			}
    366 			// Finish the current object
    367 			if obj != nil {
    368 				obj.Funcs = t.Funcs[lastf:]
    369 			}
    370 			lastf = len(t.Funcs)
    371 
    372 			// Start new object
    373 			n := len(t.Objs)
    374 			t.Objs = t.Objs[0 : n+1]
    375 			obj = &t.Objs[n]
    376 
    377 			// Count & copy path symbols
    378 			var end int
    379 			for end = i + 1; end < len(t.Syms); end++ {
    380 				if c := t.Syms[end].Type; c != 'Z' && c != 'z' {
    381 					break
    382 				}
    383 			}
    384 			obj.Paths = t.Syms[i:end]
    385 			i = end - 1 // loop will i++
    386 
    387 			// Record file names
    388 			depth := 0
    389 			for j := range obj.Paths {
    390 				s := &obj.Paths[j]
    391 				if s.Name == "" {
    392 					depth--
    393 				} else {
    394 					if depth == 0 {
    395 						t.Files[s.Name] = obj
    396 					}
    397 					depth++
    398 				}
    399 			}
    400 
    401 		case 'T', 't', 'L', 'l': // text symbol
    402 			if n := len(t.Funcs); n > 0 {
    403 				t.Funcs[n-1].End = sym.Value
    404 			}
    405 			if sym.Name == "runtime.etext" || sym.Name == "etext" {
    406 				continue
    407 			}
    408 
    409 			// Count parameter and local (auto) syms
    410 			var np, na int
    411 			var end int
    412 		countloop:
    413 			for end = i + 1; end < len(t.Syms); end++ {
    414 				switch t.Syms[end].Type {
    415 				case 'T', 't', 'L', 'l', 'Z', 'z':
    416 					break countloop
    417 				case 'p':
    418 					np++
    419 				case 'a':
    420 					na++
    421 				}
    422 			}
    423 
    424 			// Fill in the function symbol
    425 			n := len(t.Funcs)
    426 			t.Funcs = t.Funcs[0 : n+1]
    427 			fn := &t.Funcs[n]
    428 			sym.Func = fn
    429 			fn.Params = make([]*Sym, 0, np)
    430 			fn.Locals = make([]*Sym, 0, na)
    431 			fn.Sym = sym
    432 			fn.Entry = sym.Value
    433 			fn.Obj = obj
    434 			if t.go12line != nil {
    435 				// All functions share the same line table.
    436 				// It knows how to narrow down to a specific
    437 				// function quickly.
    438 				fn.LineTable = t.go12line
    439 			} else if pcln != nil {
    440 				fn.LineTable = pcln.slice(fn.Entry)
    441 				pcln = fn.LineTable
    442 			}
    443 			for j := i; j < end; j++ {
    444 				s := &t.Syms[j]
    445 				switch s.Type {
    446 				case 'm':
    447 					fn.FrameSize = int(s.Value)
    448 				case 'p':
    449 					n := len(fn.Params)
    450 					fn.Params = fn.Params[0 : n+1]
    451 					fn.Params[n] = s
    452 				case 'a':
    453 					n := len(fn.Locals)
    454 					fn.Locals = fn.Locals[0 : n+1]
    455 					fn.Locals[n] = s
    456 				}
    457 			}
    458 			i = end - 1 // loop will i++
    459 		}
    460 	}
    461 
    462 	if t.go12line != nil && nf == 0 {
    463 		t.Funcs = t.go12line.go12Funcs()
    464 	}
    465 	if obj != nil {
    466 		obj.Funcs = t.Funcs[lastf:]
    467 	}
    468 	return &t, nil
    469 }
    470 
    471 // PCToFunc returns the function containing the program counter pc,
    472 // or nil if there is no such function.
    473 func (t *Table) PCToFunc(pc uint64) *Func {
    474 	funcs := t.Funcs
    475 	for len(funcs) > 0 {
    476 		m := len(funcs) / 2
    477 		fn := &funcs[m]
    478 		switch {
    479 		case pc < fn.Entry:
    480 			funcs = funcs[0:m]
    481 		case fn.Entry <= pc && pc < fn.End:
    482 			return fn
    483 		default:
    484 			funcs = funcs[m+1:]
    485 		}
    486 	}
    487 	return nil
    488 }
    489 
    490 // PCToLine looks up line number information for a program counter.
    491 // If there is no information, it returns fn == nil.
    492 func (t *Table) PCToLine(pc uint64) (file string, line int, fn *Func) {
    493 	if fn = t.PCToFunc(pc); fn == nil {
    494 		return
    495 	}
    496 	if t.go12line != nil {
    497 		file = t.go12line.go12PCToFile(pc)
    498 		line = t.go12line.go12PCToLine(pc)
    499 	} else {
    500 		file, line = fn.Obj.lineFromAline(fn.LineTable.PCToLine(pc))
    501 	}
    502 	return
    503 }
    504 
    505 // LineToPC looks up the first program counter on the given line in
    506 // the named file.  It returns UnknownPathError or UnknownLineError if
    507 // there is an error looking up this line.
    508 func (t *Table) LineToPC(file string, line int) (pc uint64, fn *Func, err error) {
    509 	obj, ok := t.Files[file]
    510 	if !ok {
    511 		return 0, nil, UnknownFileError(file)
    512 	}
    513 
    514 	if t.go12line != nil {
    515 		pc := t.go12line.go12LineToPC(file, line)
    516 		if pc == 0 {
    517 			return 0, nil, &UnknownLineError{file, line}
    518 		}
    519 		return pc, t.PCToFunc(pc), nil
    520 	}
    521 
    522 	abs, err := obj.alineFromLine(file, line)
    523 	if err != nil {
    524 		return
    525 	}
    526 	for i := range obj.Funcs {
    527 		f := &obj.Funcs[i]
    528 		pc := f.LineTable.LineToPC(abs, f.End)
    529 		if pc != 0 {
    530 			return pc, f, nil
    531 		}
    532 	}
    533 	return 0, nil, &UnknownLineError{file, line}
    534 }
    535 
    536 // LookupSym returns the text, data, or bss symbol with the given name,
    537 // or nil if no such symbol is found.
    538 func (t *Table) LookupSym(name string) *Sym {
    539 	// TODO(austin) Maybe make a map
    540 	for i := range t.Syms {
    541 		s := &t.Syms[i]
    542 		switch s.Type {
    543 		case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b':
    544 			if s.Name == name {
    545 				return s
    546 			}
    547 		}
    548 	}
    549 	return nil
    550 }
    551 
    552 // LookupFunc returns the text, data, or bss symbol with the given name,
    553 // or nil if no such symbol is found.
    554 func (t *Table) LookupFunc(name string) *Func {
    555 	for i := range t.Funcs {
    556 		f := &t.Funcs[i]
    557 		if f.Sym.Name == name {
    558 			return f
    559 		}
    560 	}
    561 	return nil
    562 }
    563 
    564 // SymByAddr returns the text, data, or bss symbol starting at the given address.
    565 func (t *Table) SymByAddr(addr uint64) *Sym {
    566 	for i := range t.Syms {
    567 		s := &t.Syms[i]
    568 		switch s.Type {
    569 		case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b':
    570 			if s.Value == addr {
    571 				return s
    572 			}
    573 		}
    574 	}
    575 	return nil
    576 }
    577 
    578 /*
    579  * Object files
    580  */
    581 
    582 // This is legacy code for Go 1.1 and earlier, which used the
    583 // Plan 9 format for pc-line tables. This code was never quite
    584 // correct. It's probably very close, and it's usually correct, but
    585 // we never quite found all the corner cases.
    586 //
    587 // Go 1.2 and later use a simpler format, documented at golang.org/s/go12symtab.
    588 
    589 func (o *Obj) lineFromAline(aline int) (string, int) {
    590 	type stackEnt struct {
    591 		path   string
    592 		start  int
    593 		offset int
    594 		prev   *stackEnt
    595 	}
    596 
    597 	noPath := &stackEnt{"", 0, 0, nil}
    598 	tos := noPath
    599 
    600 pathloop:
    601 	for _, s := range o.Paths {
    602 		val := int(s.Value)
    603 		switch {
    604 		case val > aline:
    605 			break pathloop
    606 
    607 		case val == 1:
    608 			// Start a new stack
    609 			tos = &stackEnt{s.Name, val, 0, noPath}
    610 
    611 		case s.Name == "":
    612 			// Pop
    613 			if tos == noPath {
    614 				return "<malformed symbol table>", 0
    615 			}
    616 			tos.prev.offset += val - tos.start
    617 			tos = tos.prev
    618 
    619 		default:
    620 			// Push
    621 			tos = &stackEnt{s.Name, val, 0, tos}
    622 		}
    623 	}
    624 
    625 	if tos == noPath {
    626 		return "", 0
    627 	}
    628 	return tos.path, aline - tos.start - tos.offset + 1
    629 }
    630 
    631 func (o *Obj) alineFromLine(path string, line int) (int, error) {
    632 	if line < 1 {
    633 		return 0, &UnknownLineError{path, line}
    634 	}
    635 
    636 	for i, s := range o.Paths {
    637 		// Find this path
    638 		if s.Name != path {
    639 			continue
    640 		}
    641 
    642 		// Find this line at this stack level
    643 		depth := 0
    644 		var incstart int
    645 		line += int(s.Value)
    646 	pathloop:
    647 		for _, s := range o.Paths[i:] {
    648 			val := int(s.Value)
    649 			switch {
    650 			case depth == 1 && val >= line:
    651 				return line - 1, nil
    652 
    653 			case s.Name == "":
    654 				depth--
    655 				if depth == 0 {
    656 					break pathloop
    657 				} else if depth == 1 {
    658 					line += val - incstart
    659 				}
    660 
    661 			default:
    662 				if depth == 1 {
    663 					incstart = val
    664 				}
    665 				depth++
    666 			}
    667 		}
    668 		return 0, &UnknownLineError{path, line}
    669 	}
    670 	return 0, UnknownFileError(path)
    671 }
    672 
    673 /*
    674  * Errors
    675  */
    676 
    677 // UnknownFileError represents a failure to find the specific file in
    678 // the symbol table.
    679 type UnknownFileError string
    680 
    681 func (e UnknownFileError) Error() string { return "unknown file: " + string(e) }
    682 
    683 // UnknownLineError represents a failure to map a line to a program
    684 // counter, either because the line is beyond the bounds of the file
    685 // or because there is no code on the given line.
    686 type UnknownLineError struct {
    687 	File string
    688 	Line int
    689 }
    690 
    691 func (e *UnknownLineError) Error() string {
    692 	return "no code at " + e.File + ":" + strconv.Itoa(e.Line)
    693 }
    694 
    695 // DecodingError represents an error during the decoding of
    696 // the symbol table.
    697 type DecodingError struct {
    698 	off int
    699 	msg string
    700 	val interface{}
    701 }
    702 
    703 func (e *DecodingError) Error() string {
    704 	msg := e.msg
    705 	if e.val != nil {
    706 		msg += fmt.Sprintf(" '%v'", e.val)
    707 	}
    708 	msg += fmt.Sprintf(" at byte %#x", e.off)
    709 	return msg
    710 }
    711