Home | History | Annotate | Download | only in goobj
      1 // Copyright 2013 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package goobj implements reading of Go object files and archives.
      6 //
      7 // TODO(rsc): Decide where this package should live. (golang.org/issue/6932)
      8 // TODO(rsc): Decide the appropriate integer types for various fields.
      9 package goobj
     10 
     11 import (
     12 	"bufio"
     13 	"bytes"
     14 	"cmd/internal/objabi"
     15 	"errors"
     16 	"fmt"
     17 	"io"
     18 	"os"
     19 	"strconv"
     20 	"strings"
     21 )
     22 
     23 // A Sym is a named symbol in an object file.
     24 type Sym struct {
     25 	SymID                // symbol identifier (name and version)
     26 	Kind  objabi.SymKind // kind of symbol
     27 	DupOK bool           // are duplicate definitions okay?
     28 	Size  int64          // size of corresponding data
     29 	Type  SymID          // symbol for Go type information
     30 	Data  Data           // memory image of symbol
     31 	Reloc []Reloc        // relocations to apply to Data
     32 	Func  *Func          // additional data for functions
     33 }
     34 
     35 // A SymID - the combination of Name and Version - uniquely identifies
     36 // a symbol within a package.
     37 type SymID struct {
     38 	// Name is the name of a symbol.
     39 	Name string
     40 
     41 	// Version is zero for symbols with global visibility.
     42 	// Symbols with only file visibility (such as file-level static
     43 	// declarations in C) have a non-zero version distinguishing
     44 	// a symbol in one file from a symbol of the same name
     45 	// in another file
     46 	Version int64
     47 }
     48 
     49 func (s SymID) String() string {
     50 	if s.Version == 0 {
     51 		return s.Name
     52 	}
     53 	return fmt.Sprintf("%s<%d>", s.Name, s.Version)
     54 }
     55 
     56 // A Data is a reference to data stored in an object file.
     57 // It records the offset and size of the data, so that a client can
     58 // read the data only if necessary.
     59 type Data struct {
     60 	Offset int64
     61 	Size   int64
     62 }
     63 
     64 // A Reloc describes a relocation applied to a memory image to refer
     65 // to an address within a particular symbol.
     66 type Reloc struct {
     67 	// The bytes at [Offset, Offset+Size) within the containing Sym
     68 	// should be updated to refer to the address Add bytes after the start
     69 	// of the symbol Sym.
     70 	Offset int64
     71 	Size   int64
     72 	Sym    SymID
     73 	Add    int64
     74 
     75 	// The Type records the form of address expected in the bytes
     76 	// described by the previous fields: absolute, PC-relative, and so on.
     77 	// TODO(rsc): The interpretation of Type is not exposed by this package.
     78 	Type objabi.RelocType
     79 }
     80 
     81 // A Var describes a variable in a function stack frame: a declared
     82 // local variable, an input argument, or an output result.
     83 type Var struct {
     84 	// The combination of Name, Kind, and Offset uniquely
     85 	// identifies a variable in a function stack frame.
     86 	// Using fewer of these - in particular, using only Name - does not.
     87 	Name   string // Name of variable.
     88 	Kind   int64  // TODO(rsc): Define meaning.
     89 	Offset int64  // Frame offset. TODO(rsc): Define meaning.
     90 
     91 	Type SymID // Go type for variable.
     92 }
     93 
     94 // Func contains additional per-symbol information specific to functions.
     95 type Func struct {
     96 	Args     int64      // size in bytes of argument frame: inputs and outputs
     97 	Frame    int64      // size in bytes of local variable frame
     98 	Leaf     bool       // function omits save of link register (ARM)
     99 	NoSplit  bool       // function omits stack split prologue
    100 	Var      []Var      // detail about local variables
    101 	PCSP     Data       // PC  SP offset map
    102 	PCFile   Data       // PC  file number map (index into File)
    103 	PCLine   Data       // PC  line number map
    104 	PCInline Data       // PC  inline tree index map
    105 	PCData   []Data     // PC  runtime support data map
    106 	FuncData []FuncData // non-PC-specific runtime support data
    107 	File     []string   // paths indexed by PCFile
    108 	InlTree  []InlinedCall
    109 }
    110 
    111 // TODO: Add PCData []byte and PCDataIter (similar to liblink).
    112 
    113 // A FuncData is a single function-specific data value.
    114 type FuncData struct {
    115 	Sym    SymID // symbol holding data
    116 	Offset int64 // offset into symbol for funcdata pointer
    117 }
    118 
    119 // An InlinedCall is a node in an InlTree.
    120 // See cmd/internal/obj.InlTree for details.
    121 type InlinedCall struct {
    122 	Parent int64
    123 	File   string
    124 	Line   int64
    125 	Func   SymID
    126 }
    127 
    128 // A Package is a parsed Go object file or archive defining a Go package.
    129 type Package struct {
    130 	ImportPath string          // import path denoting this package
    131 	Imports    []string        // packages imported by this package
    132 	SymRefs    []SymID         // list of symbol names and versions referred to by this pack
    133 	Syms       []*Sym          // symbols defined by this package
    134 	MaxVersion int64           // maximum Version in any SymID in Syms
    135 	Arch       string          // architecture
    136 	Native     []*NativeReader // native object data (e.g. ELF)
    137 }
    138 
    139 type NativeReader struct {
    140 	Name string
    141 	io.ReaderAt
    142 }
    143 
    144 var (
    145 	archiveHeader = []byte("!<arch>\n")
    146 	archiveMagic  = []byte("`\n")
    147 	goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
    148 
    149 	errCorruptArchive   = errors.New("corrupt archive")
    150 	errTruncatedArchive = errors.New("truncated archive")
    151 	errCorruptObject    = errors.New("corrupt object file")
    152 	errNotObject        = errors.New("unrecognized object file format")
    153 )
    154 
    155 // An objReader is an object file reader.
    156 type objReader struct {
    157 	p          *Package
    158 	b          *bufio.Reader
    159 	f          *os.File
    160 	err        error
    161 	offset     int64
    162 	dataOffset int64
    163 	limit      int64
    164 	tmp        [256]byte
    165 	pkgprefix  string
    166 }
    167 
    168 // init initializes r to read package p from f.
    169 func (r *objReader) init(f *os.File, p *Package) {
    170 	r.f = f
    171 	r.p = p
    172 	r.offset, _ = f.Seek(0, io.SeekCurrent)
    173 	r.limit, _ = f.Seek(0, io.SeekEnd)
    174 	f.Seek(r.offset, io.SeekStart)
    175 	r.b = bufio.NewReader(f)
    176 	r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "."
    177 }
    178 
    179 // error records that an error occurred.
    180 // It returns only the first error, so that an error
    181 // caused by an earlier error does not discard information
    182 // about the earlier error.
    183 func (r *objReader) error(err error) error {
    184 	if r.err == nil {
    185 		if err == io.EOF {
    186 			err = io.ErrUnexpectedEOF
    187 		}
    188 		r.err = err
    189 	}
    190 	// panic("corrupt") // useful for debugging
    191 	return r.err
    192 }
    193 
    194 // peek returns the next n bytes without advancing the reader.
    195 func (r *objReader) peek(n int) ([]byte, error) {
    196 	if r.err != nil {
    197 		return nil, r.err
    198 	}
    199 	if r.offset >= r.limit {
    200 		r.error(io.ErrUnexpectedEOF)
    201 		return nil, r.err
    202 	}
    203 	b, err := r.b.Peek(n)
    204 	if err != nil {
    205 		if err != bufio.ErrBufferFull {
    206 			r.error(err)
    207 		}
    208 	}
    209 	return b, err
    210 }
    211 
    212 // readByte reads and returns a byte from the input file.
    213 // On I/O error or EOF, it records the error but returns byte 0.
    214 // A sequence of 0 bytes will eventually terminate any
    215 // parsing state in the object file. In particular, it ends the
    216 // reading of a varint.
    217 func (r *objReader) readByte() byte {
    218 	if r.err != nil {
    219 		return 0
    220 	}
    221 	if r.offset >= r.limit {
    222 		r.error(io.ErrUnexpectedEOF)
    223 		return 0
    224 	}
    225 	b, err := r.b.ReadByte()
    226 	if err != nil {
    227 		if err == io.EOF {
    228 			err = io.ErrUnexpectedEOF
    229 		}
    230 		r.error(err)
    231 		b = 0
    232 	} else {
    233 		r.offset++
    234 	}
    235 	return b
    236 }
    237 
    238 // read reads exactly len(b) bytes from the input file.
    239 // If an error occurs, read returns the error but also
    240 // records it, so it is safe for callers to ignore the result
    241 // as long as delaying the report is not a problem.
    242 func (r *objReader) readFull(b []byte) error {
    243 	if r.err != nil {
    244 		return r.err
    245 	}
    246 	if r.offset+int64(len(b)) > r.limit {
    247 		return r.error(io.ErrUnexpectedEOF)
    248 	}
    249 	n, err := io.ReadFull(r.b, b)
    250 	r.offset += int64(n)
    251 	if err != nil {
    252 		return r.error(err)
    253 	}
    254 	return nil
    255 }
    256 
    257 // readInt reads a zigzag varint from the input file.
    258 func (r *objReader) readInt() int64 {
    259 	var u uint64
    260 
    261 	for shift := uint(0); ; shift += 7 {
    262 		if shift >= 64 {
    263 			r.error(errCorruptObject)
    264 			return 0
    265 		}
    266 		c := r.readByte()
    267 		u |= uint64(c&0x7F) << shift
    268 		if c&0x80 == 0 {
    269 			break
    270 		}
    271 	}
    272 
    273 	return int64(u>>1) ^ (int64(u) << 63 >> 63)
    274 }
    275 
    276 // readString reads a length-delimited string from the input file.
    277 func (r *objReader) readString() string {
    278 	n := r.readInt()
    279 	buf := make([]byte, n)
    280 	r.readFull(buf)
    281 	return string(buf)
    282 }
    283 
    284 // readSymID reads a SymID from the input file.
    285 func (r *objReader) readSymID() SymID {
    286 	i := r.readInt()
    287 	return r.p.SymRefs[i]
    288 }
    289 
    290 func (r *objReader) readRef() {
    291 	name, vers := r.readString(), r.readInt()
    292 
    293 	// In a symbol name in an object file, "". denotes the
    294 	// prefix for the package in which the object file has been found.
    295 	// Expand it.
    296 	name = strings.Replace(name, `"".`, r.pkgprefix, -1)
    297 
    298 	// An individual object file only records version 0 (extern) or 1 (static).
    299 	// To make static symbols unique across all files being read, we
    300 	// replace version 1 with the version corresponding to the current
    301 	// file number. The number is incremented on each call to parseObject.
    302 	if vers != 0 {
    303 		vers = r.p.MaxVersion
    304 	}
    305 	r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers})
    306 }
    307 
    308 // readData reads a data reference from the input file.
    309 func (r *objReader) readData() Data {
    310 	n := r.readInt()
    311 	d := Data{Offset: r.dataOffset, Size: n}
    312 	r.dataOffset += n
    313 	return d
    314 }
    315 
    316 // skip skips n bytes in the input.
    317 func (r *objReader) skip(n int64) {
    318 	if n < 0 {
    319 		r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
    320 	}
    321 	if n < int64(len(r.tmp)) {
    322 		// Since the data is so small, a just reading from the buffered
    323 		// reader is better than flushing the buffer and seeking.
    324 		r.readFull(r.tmp[:n])
    325 	} else if n <= int64(r.b.Buffered()) {
    326 		// Even though the data is not small, it has already been read.
    327 		// Advance the buffer instead of seeking.
    328 		for n > int64(len(r.tmp)) {
    329 			r.readFull(r.tmp[:])
    330 			n -= int64(len(r.tmp))
    331 		}
    332 		r.readFull(r.tmp[:n])
    333 	} else {
    334 		// Seek, giving up buffered data.
    335 		_, err := r.f.Seek(r.offset+n, io.SeekStart)
    336 		if err != nil {
    337 			r.error(err)
    338 		}
    339 		r.offset += n
    340 		r.b.Reset(r.f)
    341 	}
    342 }
    343 
    344 // Parse parses an object file or archive from f,
    345 // assuming that its import path is pkgpath.
    346 func Parse(f *os.File, pkgpath string) (*Package, error) {
    347 	if pkgpath == "" {
    348 		pkgpath = `""`
    349 	}
    350 	p := new(Package)
    351 	p.ImportPath = pkgpath
    352 
    353 	var rd objReader
    354 	rd.init(f, p)
    355 	err := rd.readFull(rd.tmp[:8])
    356 	if err != nil {
    357 		if err == io.EOF {
    358 			err = io.ErrUnexpectedEOF
    359 		}
    360 		return nil, err
    361 	}
    362 
    363 	switch {
    364 	default:
    365 		return nil, errNotObject
    366 
    367 	case bytes.Equal(rd.tmp[:8], archiveHeader):
    368 		if err := rd.parseArchive(); err != nil {
    369 			return nil, err
    370 		}
    371 	case bytes.Equal(rd.tmp[:8], goobjHeader):
    372 		if err := rd.parseObject(goobjHeader); err != nil {
    373 			return nil, err
    374 		}
    375 	}
    376 
    377 	return p, nil
    378 }
    379 
    380 // trimSpace removes trailing spaces from b and returns the corresponding string.
    381 // This effectively parses the form used in archive headers.
    382 func trimSpace(b []byte) string {
    383 	return string(bytes.TrimRight(b, " "))
    384 }
    385 
    386 // parseArchive parses a Unix archive of Go object files.
    387 func (r *objReader) parseArchive() error {
    388 	for r.offset < r.limit {
    389 		if err := r.readFull(r.tmp[:60]); err != nil {
    390 			return err
    391 		}
    392 		data := r.tmp[:60]
    393 
    394 		// Each file is preceded by this text header (slice indices in first column):
    395 		//	 0:16	name
    396 		//	16:28 date
    397 		//	28:34 uid
    398 		//	34:40 gid
    399 		//	40:48 mode
    400 		//	48:58 size
    401 		//	58:60 magic - `\n
    402 		// We only care about name, size, and magic.
    403 		// The fields are space-padded on the right.
    404 		// The size is in decimal.
    405 		// The file data - size bytes - follows the header.
    406 		// Headers are 2-byte aligned, so if size is odd, an extra padding
    407 		// byte sits between the file data and the next header.
    408 		// The file data that follows is padded to an even number of bytes:
    409 		// if size is odd, an extra padding byte is inserted betw the next header.
    410 		if len(data) < 60 {
    411 			return errTruncatedArchive
    412 		}
    413 		if !bytes.Equal(data[58:60], archiveMagic) {
    414 			return errCorruptArchive
    415 		}
    416 		name := trimSpace(data[0:16])
    417 		size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64)
    418 		if err != nil {
    419 			return errCorruptArchive
    420 		}
    421 		data = data[60:]
    422 		fsize := size + size&1
    423 		if fsize < 0 || fsize < size {
    424 			return errCorruptArchive
    425 		}
    426 		switch name {
    427 		case "__.PKGDEF":
    428 			r.skip(size)
    429 		default:
    430 			oldLimit := r.limit
    431 			r.limit = r.offset + size
    432 
    433 			p, err := r.peek(8)
    434 			if err != nil {
    435 				return err
    436 			}
    437 			if bytes.Equal(p, goobjHeader) {
    438 				if err := r.parseObject(nil); err != nil {
    439 					return fmt.Errorf("parsing archive member %q: %v", name, err)
    440 				}
    441 			} else {
    442 				r.p.Native = append(r.p.Native, &NativeReader{
    443 					Name:     name,
    444 					ReaderAt: io.NewSectionReader(r.f, r.offset, size),
    445 				})
    446 			}
    447 
    448 			r.skip(r.limit - r.offset)
    449 			r.limit = oldLimit
    450 		}
    451 		if size&1 != 0 {
    452 			r.skip(1)
    453 		}
    454 	}
    455 	return nil
    456 }
    457 
    458 // parseObject parses a single Go object file.
    459 // The prefix is the bytes already read from the file,
    460 // typically in order to detect that this is an object file.
    461 // The object file consists of a textual header ending in "\n!\n"
    462 // and then the part we want to parse begins.
    463 // The format of that part is defined in a comment at the top
    464 // of src/liblink/objfile.c.
    465 func (r *objReader) parseObject(prefix []byte) error {
    466 	r.p.MaxVersion++
    467 	h := make([]byte, 0, 256)
    468 	h = append(h, prefix...)
    469 	var c1, c2, c3 byte
    470 	for {
    471 		c1, c2, c3 = c2, c3, r.readByte()
    472 		h = append(h, c3)
    473 		// The new export format can contain 0 bytes.
    474 		// Don't consider them errors, only look for r.err != nil.
    475 		if r.err != nil {
    476 			return errCorruptObject
    477 		}
    478 		if c1 == '\n' && c2 == '!' && c3 == '\n' {
    479 			break
    480 		}
    481 	}
    482 
    483 	hs := strings.Fields(string(h))
    484 	if len(hs) >= 4 {
    485 		r.p.Arch = hs[3]
    486 	}
    487 	// TODO: extract OS + build ID if/when we need it
    488 
    489 	r.readFull(r.tmp[:8])
    490 	if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go19ld")) {
    491 		return r.error(errCorruptObject)
    492 	}
    493 
    494 	b := r.readByte()
    495 	if b != 1 {
    496 		return r.error(errCorruptObject)
    497 	}
    498 
    499 	// Direct package dependencies.
    500 	for {
    501 		s := r.readString()
    502 		if s == "" {
    503 			break
    504 		}
    505 		r.p.Imports = append(r.p.Imports, s)
    506 	}
    507 
    508 	r.p.SymRefs = []SymID{{"", 0}}
    509 	for {
    510 		if b := r.readByte(); b != 0xfe {
    511 			if b != 0xff {
    512 				return r.error(errCorruptObject)
    513 			}
    514 			break
    515 		}
    516 
    517 		r.readRef()
    518 	}
    519 
    520 	dataLength := r.readInt()
    521 	r.readInt() // n relocations - ignore
    522 	r.readInt() // n pcdata - ignore
    523 	r.readInt() // n autom - ignore
    524 	r.readInt() // n funcdata - ignore
    525 	r.readInt() // n files - ignore
    526 
    527 	r.dataOffset = r.offset
    528 	r.skip(dataLength)
    529 
    530 	// Symbols.
    531 	for {
    532 		if b := r.readByte(); b != 0xfe {
    533 			if b != 0xff {
    534 				return r.error(errCorruptObject)
    535 			}
    536 			break
    537 		}
    538 
    539 		typ := r.readByte()
    540 		s := &Sym{SymID: r.readSymID()}
    541 		r.p.Syms = append(r.p.Syms, s)
    542 		s.Kind = objabi.SymKind(typ)
    543 		flags := r.readInt()
    544 		s.DupOK = flags&1 != 0
    545 		s.Size = r.readInt()
    546 		s.Type = r.readSymID()
    547 		s.Data = r.readData()
    548 		s.Reloc = make([]Reloc, r.readInt())
    549 		for i := range s.Reloc {
    550 			rel := &s.Reloc[i]
    551 			rel.Offset = r.readInt()
    552 			rel.Size = r.readInt()
    553 			rel.Type = objabi.RelocType(r.readInt())
    554 			rel.Add = r.readInt()
    555 			rel.Sym = r.readSymID()
    556 		}
    557 
    558 		if s.Kind == objabi.STEXT {
    559 			f := new(Func)
    560 			s.Func = f
    561 			f.Args = r.readInt()
    562 			f.Frame = r.readInt()
    563 			flags := r.readInt()
    564 			f.Leaf = flags&(1<<0) != 0
    565 			f.NoSplit = r.readInt() != 0
    566 			f.Var = make([]Var, r.readInt())
    567 			for i := range f.Var {
    568 				v := &f.Var[i]
    569 				v.Name = r.readSymID().Name
    570 				v.Offset = r.readInt()
    571 				v.Kind = r.readInt()
    572 				v.Type = r.readSymID()
    573 			}
    574 
    575 			f.PCSP = r.readData()
    576 			f.PCFile = r.readData()
    577 			f.PCLine = r.readData()
    578 			f.PCInline = r.readData()
    579 			f.PCData = make([]Data, r.readInt())
    580 			for i := range f.PCData {
    581 				f.PCData[i] = r.readData()
    582 			}
    583 			f.FuncData = make([]FuncData, r.readInt())
    584 			for i := range f.FuncData {
    585 				f.FuncData[i].Sym = r.readSymID()
    586 			}
    587 			for i := range f.FuncData {
    588 				f.FuncData[i].Offset = int64(r.readInt()) // TODO
    589 			}
    590 			f.File = make([]string, r.readInt())
    591 			for i := range f.File {
    592 				f.File[i] = r.readSymID().Name
    593 			}
    594 			f.InlTree = make([]InlinedCall, r.readInt())
    595 			for i := range f.InlTree {
    596 				f.InlTree[i].Parent = r.readInt()
    597 				f.InlTree[i].File = r.readSymID().Name
    598 				f.InlTree[i].Line = r.readInt()
    599 				f.InlTree[i].Func = r.readSymID()
    600 			}
    601 		}
    602 	}
    603 
    604 	r.readFull(r.tmp[:7])
    605 	if !bytes.Equal(r.tmp[:7], []byte("\xffgo19ld")) {
    606 		return r.error(errCorruptObject)
    607 	}
    608 
    609 	return nil
    610 }
    611 
    612 func (r *Reloc) String(insnOffset uint64) string {
    613 	delta := r.Offset - int64(insnOffset)
    614 	s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type)
    615 	if r.Sym.Name != "" {
    616 		if r.Add != 0 {
    617 			return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add)
    618 		}
    619 		return fmt.Sprintf("%s:%s", s, r.Sym.Name)
    620 	}
    621 	if r.Add != 0 {
    622 		return fmt.Sprintf("%s:%d", s, r.Add)
    623 	}
    624 	return s
    625 }
    626