Home | History | Annotate | Download | only in ld
      1 // Copyright 2013 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package ld
      6 
      7 // Reading of Go object files.
      8 //
      9 // Originally, Go object files were Plan 9 object files, but no longer.
     10 // Now they are more like standard object files, in that each symbol is defined
     11 // by an associated memory image (bytes) and a list of relocations to apply
     12 // during linking. We do not (yet?) use a standard file format, however.
     13 // For now, the format is chosen to be as simple as possible to read and write.
     14 // It may change for reasons of efficiency, or we may even switch to a
     15 // standard file format if there are compelling benefits to doing so.
     16 // See golang.org/s/go13linker for more background.
     17 //
     18 // The file format is:
     19 //
     20 //	- magic header: "\x00\x00go17ld"
     21 //	- byte 1 - version number
     22 //	- sequence of strings giving dependencies (imported packages)
     23 //	- empty string (marks end of sequence)
     24 //	- sequence of symbol references used by the defined symbols
     25 //	- byte 0xff (marks end of sequence)
     26 //	- sequence of integer lengths:
     27 //		- total data length
     28 //		- total number of relocations
     29 //		- total number of pcdata
     30 //		- total number of automatics
     31 //		- total number of funcdata
     32 //		- total number of files
     33 //	- data, the content of the defined symbols
     34 //	- sequence of defined symbols
     35 //	- byte 0xff (marks end of sequence)
     36 //	- magic footer: "\xff\xffgo17ld"
     37 //
     38 // All integers are stored in a zigzag varint format.
     39 // See golang.org/s/go12symtab for a definition.
     40 //
     41 // Data blocks and strings are both stored as an integer
     42 // followed by that many bytes.
     43 //
     44 // A symbol reference is a string name followed by a version.
     45 //
     46 // A symbol points to other symbols using an index into the symbol
     47 // reference sequence. Index 0 corresponds to a nil Object* pointer.
     48 // In the symbol layout described below "symref index" stands for this
     49 // index.
     50 //
     51 // Each symbol is laid out as the following fields (taken from Object*):
     52 //
     53 //	- byte 0xfe (sanity check for synchronization)
     54 //	- type [int]
     55 //	- name & version [symref index]
     56 //	- flags [int]
     57 //		1<<0 dupok
     58 //		1<<1 local
     59 //		1<<2 add to typelink table
     60 //	- size [int]
     61 //	- gotype [symref index]
     62 //	- p [data block]
     63 //	- nr [int]
     64 //	- r [nr relocations, sorted by off]
     65 //
     66 // If type == STEXT, there are a few more fields:
     67 //
     68 //	- args [int]
     69 //	- locals [int]
     70 //	- nosplit [int]
     71 //	- flags [int]
     72 //		1<<0 leaf
     73 //		1<<1 C function
     74 //		1<<2 function may call reflect.Type.Method
     75 //	- nlocal [int]
     76 //	- local [nlocal automatics]
     77 //	- pcln [pcln table]
     78 //
     79 // Each relocation has the encoding:
     80 //
     81 //	- off [int]
     82 //	- siz [int]
     83 //	- type [int]
     84 //	- add [int]
     85 //	- sym [symref index]
     86 //
     87 // Each local has the encoding:
     88 //
     89 //	- asym [symref index]
     90 //	- offset [int]
     91 //	- type [int]
     92 //	- gotype [symref index]
     93 //
     94 // The pcln table has the encoding:
     95 //
     96 //	- pcsp [data block]
     97 //	- pcfile [data block]
     98 //	- pcline [data block]
     99 //	- npcdata [int]
    100 //	- pcdata [npcdata data blocks]
    101 //	- nfuncdata [int]
    102 //	- funcdata [nfuncdata symref index]
    103 //	- funcdatasym [nfuncdata ints]
    104 //	- nfile [int]
    105 //	- file [nfile symref index]
    106 //
    107 // The file layout and meaning of type integers are architecture-independent.
    108 //
    109 // TODO(rsc): The file format is good for a first pass but needs work.
    110 //	- There are SymID in the object file that should really just be strings.
    111 
    112 import (
    113 	"bufio"
    114 	"bytes"
    115 	"cmd/internal/bio"
    116 	"cmd/internal/dwarf"
    117 	"cmd/internal/obj"
    118 	"crypto/sha1"
    119 	"encoding/base64"
    120 	"io"
    121 	"log"
    122 	"strconv"
    123 	"strings"
    124 )
    125 
    126 const (
    127 	startmagic = "\x00\x00go17ld"
    128 	endmagic   = "\xff\xffgo17ld"
    129 )
    130 
    131 var emptyPkg = []byte(`"".`)
    132 
    133 // objReader reads Go object files.
    134 type objReader struct {
    135 	rd              *bufio.Reader
    136 	ctxt            *Link
    137 	lib             *Library
    138 	pn              string
    139 	dupSym          *Symbol
    140 	localSymVersion int
    141 
    142 	// rdBuf is used by readString and readSymName as scratch for reading strings.
    143 	rdBuf []byte
    144 
    145 	// List of symbol references for the file being read.
    146 	refs        []*Symbol
    147 	data        []byte
    148 	reloc       []Reloc
    149 	pcdata      []Pcdata
    150 	autom       []Auto
    151 	funcdata    []*Symbol
    152 	funcdataoff []int64
    153 	file        []*Symbol
    154 }
    155 
    156 func LoadObjFile(ctxt *Link, f *bio.Reader, lib *Library, length int64, pn string) {
    157 
    158 	start := f.Offset()
    159 	r := &objReader{
    160 		rd:              f.Reader,
    161 		lib:             lib,
    162 		ctxt:            ctxt,
    163 		pn:              pn,
    164 		dupSym:          &Symbol{Name: ".dup"},
    165 		localSymVersion: ctxt.Syms.IncVersion(),
    166 	}
    167 	r.loadObjFile()
    168 	if f.Offset() != start+length {
    169 		log.Fatalf("%s: unexpected end at %d, want %d", pn, f.Offset(), start+length)
    170 	}
    171 }
    172 
    173 func (r *objReader) loadObjFile() {
    174 	pkg := pathtoprefix(r.lib.Pkg)
    175 
    176 	// Magic header
    177 	var buf [8]uint8
    178 	r.readFull(buf[:])
    179 	if string(buf[:]) != startmagic {
    180 		log.Fatalf("%s: invalid file start %x %x %x %x %x %x %x %x", r.pn, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7])
    181 	}
    182 
    183 	// Version
    184 	c, err := r.rd.ReadByte()
    185 	if err != nil || c != 1 {
    186 		log.Fatalf("%s: invalid file version number %d", r.pn, c)
    187 	}
    188 
    189 	// Autolib
    190 	for {
    191 		lib := r.readString()
    192 		if lib == "" {
    193 			break
    194 		}
    195 		l := addlib(r.ctxt, pkg, r.pn, lib)
    196 		if l != nil {
    197 			r.lib.imports = append(r.lib.imports, l)
    198 		}
    199 	}
    200 
    201 	// Symbol references
    202 	r.refs = []*Symbol{nil} // zeroth ref is nil
    203 	for {
    204 		c, err := r.rd.Peek(1)
    205 		if err != nil {
    206 			log.Fatalf("%s: peeking: %v", r.pn, err)
    207 		}
    208 		if c[0] == 0xff {
    209 			r.rd.ReadByte()
    210 			break
    211 		}
    212 		r.readRef()
    213 	}
    214 
    215 	// Lengths
    216 	r.readSlices()
    217 
    218 	// Data section
    219 	r.readFull(r.data)
    220 
    221 	// Defined symbols
    222 	for {
    223 		c, err := r.rd.Peek(1)
    224 		if err != nil {
    225 			log.Fatalf("%s: peeking: %v", r.pn, err)
    226 		}
    227 		if c[0] == 0xff {
    228 			break
    229 		}
    230 		r.readSym()
    231 	}
    232 
    233 	// Magic footer
    234 	buf = [8]uint8{}
    235 	r.readFull(buf[:])
    236 	if string(buf[:]) != endmagic {
    237 		log.Fatalf("%s: invalid file end", r.pn)
    238 	}
    239 }
    240 
    241 func (r *objReader) readSlices() {
    242 	n := r.readInt()
    243 	r.data = make([]byte, n)
    244 	n = r.readInt()
    245 	r.reloc = make([]Reloc, n)
    246 	n = r.readInt()
    247 	r.pcdata = make([]Pcdata, n)
    248 	n = r.readInt()
    249 	r.autom = make([]Auto, n)
    250 	n = r.readInt()
    251 	r.funcdata = make([]*Symbol, n)
    252 	r.funcdataoff = make([]int64, n)
    253 	n = r.readInt()
    254 	r.file = make([]*Symbol, n)
    255 }
    256 
    257 // Symbols are prefixed so their content doesn't get confused with the magic footer.
    258 const symPrefix = 0xfe
    259 
    260 func (r *objReader) readSym() {
    261 	if c, err := r.rd.ReadByte(); c != symPrefix || err != nil {
    262 		log.Fatalln("readSym out of sync")
    263 	}
    264 	t := obj.SymKind(r.readInt())
    265 	s := r.readSymIndex()
    266 	flags := r.readInt()
    267 	dupok := flags&1 != 0
    268 	local := flags&2 != 0
    269 	makeTypelink := flags&4 != 0
    270 	size := r.readInt()
    271 	typ := r.readSymIndex()
    272 	data := r.readData()
    273 	nreloc := r.readInt()
    274 	pkg := pathtoprefix(r.lib.Pkg)
    275 	isdup := false
    276 
    277 	var dup *Symbol
    278 	if s.Type != 0 && s.Type != obj.SXREF {
    279 		if (t == obj.SDATA || t == obj.SBSS || t == obj.SNOPTRBSS) && len(data) == 0 && nreloc == 0 {
    280 			if s.Size < int64(size) {
    281 				s.Size = int64(size)
    282 			}
    283 			if typ != nil && s.Gotype == nil {
    284 				s.Gotype = typ
    285 			}
    286 			return
    287 		}
    288 
    289 		if (s.Type == obj.SDATA || s.Type == obj.SBSS || s.Type == obj.SNOPTRBSS) && len(s.P) == 0 && len(s.R) == 0 {
    290 			goto overwrite
    291 		}
    292 		if s.Type != obj.SBSS && s.Type != obj.SNOPTRBSS && !dupok && !s.Attr.DuplicateOK() {
    293 			log.Fatalf("duplicate symbol %s (types %d and %d) in %s and %s", s.Name, s.Type, t, s.File, r.pn)
    294 		}
    295 		if len(s.P) > 0 {
    296 			dup = s
    297 			s = r.dupSym
    298 			isdup = true
    299 		}
    300 	}
    301 
    302 overwrite:
    303 	s.File = pkg
    304 	if dupok {
    305 		s.Attr |= AttrDuplicateOK
    306 	}
    307 	if t == obj.SXREF {
    308 		log.Fatalf("bad sxref")
    309 	}
    310 	if t == 0 {
    311 		log.Fatalf("missing type for %s in %s", s.Name, r.pn)
    312 	}
    313 	if t == obj.SBSS && (s.Type == obj.SRODATA || s.Type == obj.SNOPTRBSS) {
    314 		t = s.Type
    315 	}
    316 	s.Type = t
    317 	if s.Size < int64(size) {
    318 		s.Size = int64(size)
    319 	}
    320 	s.Attr.Set(AttrLocal, local)
    321 	s.Attr.Set(AttrMakeTypelink, makeTypelink)
    322 	if typ != nil {
    323 		s.Gotype = typ
    324 	}
    325 	if isdup && typ != nil { // if bss sym defined multiple times, take type from any one def
    326 		dup.Gotype = typ
    327 	}
    328 	s.P = data
    329 	if nreloc > 0 {
    330 		s.R = r.reloc[:nreloc:nreloc]
    331 		if !isdup {
    332 			r.reloc = r.reloc[nreloc:]
    333 		}
    334 
    335 		for i := 0; i < nreloc; i++ {
    336 			s.R[i] = Reloc{
    337 				Off:  r.readInt32(),
    338 				Siz:  r.readUint8(),
    339 				Type: obj.RelocType(r.readInt32()),
    340 				Add:  r.readInt64(),
    341 				Sym:  r.readSymIndex(),
    342 			}
    343 		}
    344 	}
    345 
    346 	if s.Type == obj.STEXT {
    347 		s.FuncInfo = new(FuncInfo)
    348 		pc := s.FuncInfo
    349 
    350 		pc.Args = r.readInt32()
    351 		pc.Locals = r.readInt32()
    352 		if r.readUint8() != 0 {
    353 			s.Attr |= AttrNoSplit
    354 		}
    355 		flags := r.readInt()
    356 		if flags&(1<<2) != 0 {
    357 			s.Attr |= AttrReflectMethod
    358 		}
    359 		n := r.readInt()
    360 		pc.Autom = r.autom[:n:n]
    361 		if !isdup {
    362 			r.autom = r.autom[n:]
    363 		}
    364 
    365 		for i := 0; i < n; i++ {
    366 			pc.Autom[i] = Auto{
    367 				Asym:    r.readSymIndex(),
    368 				Aoffset: r.readInt32(),
    369 				Name:    r.readInt16(),
    370 				Gotype:  r.readSymIndex(),
    371 			}
    372 		}
    373 
    374 		pc.Pcsp.P = r.readData()
    375 		pc.Pcfile.P = r.readData()
    376 		pc.Pcline.P = r.readData()
    377 		n = r.readInt()
    378 		pc.Pcdata = r.pcdata[:n:n]
    379 		if !isdup {
    380 			r.pcdata = r.pcdata[n:]
    381 		}
    382 		for i := 0; i < n; i++ {
    383 			pc.Pcdata[i].P = r.readData()
    384 		}
    385 		n = r.readInt()
    386 		pc.Funcdata = r.funcdata[:n:n]
    387 		pc.Funcdataoff = r.funcdataoff[:n:n]
    388 		if !isdup {
    389 			r.funcdata = r.funcdata[n:]
    390 			r.funcdataoff = r.funcdataoff[n:]
    391 		}
    392 		for i := 0; i < n; i++ {
    393 			pc.Funcdata[i] = r.readSymIndex()
    394 		}
    395 		for i := 0; i < n; i++ {
    396 			pc.Funcdataoff[i] = r.readInt64()
    397 		}
    398 		n = r.readInt()
    399 		pc.File = r.file[:n:n]
    400 		if !isdup {
    401 			r.file = r.file[n:]
    402 		}
    403 		for i := 0; i < n; i++ {
    404 			pc.File[i] = r.readSymIndex()
    405 		}
    406 
    407 		if !dupok {
    408 			if s.Attr.OnList() {
    409 				log.Fatalf("symbol %s listed multiple times", s.Name)
    410 			}
    411 			s.Attr |= AttrOnList
    412 			r.lib.textp = append(r.lib.textp, s)
    413 		} else {
    414 			// there may ba a dup in another package
    415 			// put into a temp list and add to text later
    416 			if !isdup {
    417 				r.lib.dupTextSyms = append(r.lib.dupTextSyms, s)
    418 			} else {
    419 				r.lib.dupTextSyms = append(r.lib.dupTextSyms, dup)
    420 			}
    421 		}
    422 	}
    423 	if s.Type == obj.SDWARFINFO {
    424 		r.patchDWARFName(s)
    425 	}
    426 }
    427 
    428 func (r *objReader) patchDWARFName(s *Symbol) {
    429 	// This is kind of ugly. Really the package name should not
    430 	// even be included here.
    431 	if s.Size < 1 || s.P[0] != dwarf.DW_ABRV_FUNCTION {
    432 		return
    433 	}
    434 	e := bytes.IndexByte(s.P, 0)
    435 	if e == -1 {
    436 		return
    437 	}
    438 	p := bytes.Index(s.P[:e], emptyPkg)
    439 	if p == -1 {
    440 		return
    441 	}
    442 	pkgprefix := []byte(pathtoprefix(r.lib.Pkg) + ".")
    443 	patched := bytes.Replace(s.P[:e], emptyPkg, pkgprefix, -1)
    444 
    445 	s.P = append(patched, s.P[e:]...)
    446 	delta := int64(len(s.P)) - s.Size
    447 	s.Size = int64(len(s.P))
    448 	for i := range s.R {
    449 		r := &s.R[i]
    450 		if r.Off > int32(e) {
    451 			r.Off += int32(delta)
    452 		}
    453 	}
    454 }
    455 
    456 func (r *objReader) readFull(b []byte) {
    457 	_, err := io.ReadFull(r.rd, b)
    458 	if err != nil {
    459 		log.Fatalf("%s: error reading %s", r.pn, err)
    460 	}
    461 }
    462 
    463 func (r *objReader) readRef() {
    464 	if c, err := r.rd.ReadByte(); c != symPrefix || err != nil {
    465 		log.Fatalf("readSym out of sync")
    466 	}
    467 	name := r.readSymName()
    468 	v := r.readInt()
    469 	if v != 0 && v != 1 {
    470 		log.Fatalf("invalid symbol version %d", v)
    471 	}
    472 	if v == 1 {
    473 		v = r.localSymVersion
    474 	}
    475 	s := r.ctxt.Syms.Lookup(name, v)
    476 	r.refs = append(r.refs, s)
    477 
    478 	if s == nil || v != 0 {
    479 		return
    480 	}
    481 	if s.Name[0] == '$' && len(s.Name) > 5 && s.Type == 0 && len(s.P) == 0 {
    482 		x, err := strconv.ParseUint(s.Name[5:], 16, 64)
    483 		if err != nil {
    484 			log.Panicf("failed to parse $-symbol %s: %v", s.Name, err)
    485 		}
    486 		s.Type = obj.SRODATA
    487 		s.Attr |= AttrLocal
    488 		switch s.Name[:5] {
    489 		case "$f32.":
    490 			if uint64(uint32(x)) != x {
    491 				log.Panicf("$-symbol %s too large: %d", s.Name, x)
    492 			}
    493 			Adduint32(r.ctxt, s, uint32(x))
    494 		case "$f64.", "$i64.":
    495 			Adduint64(r.ctxt, s, x)
    496 		default:
    497 			log.Panicf("unrecognized $-symbol: %s", s.Name)
    498 		}
    499 		s.Attr.Set(AttrReachable, false)
    500 	}
    501 	if strings.HasPrefix(s.Name, "runtime.gcbits.") {
    502 		s.Attr |= AttrLocal
    503 	}
    504 }
    505 
    506 func (r *objReader) readInt64() int64 {
    507 	uv := uint64(0)
    508 	for shift := uint(0); ; shift += 7 {
    509 		if shift >= 64 {
    510 			log.Fatalf("corrupt input")
    511 		}
    512 		c, err := r.rd.ReadByte()
    513 		if err != nil {
    514 			log.Fatalln("error reading input: ", err)
    515 		}
    516 		uv |= uint64(c&0x7F) << shift
    517 		if c&0x80 == 0 {
    518 			break
    519 		}
    520 	}
    521 
    522 	return int64(uv>>1) ^ (int64(uv<<63) >> 63)
    523 }
    524 
    525 func (r *objReader) readInt() int {
    526 	n := r.readInt64()
    527 	if int64(int(n)) != n {
    528 		log.Panicf("%v out of range for int", n)
    529 	}
    530 	return int(n)
    531 }
    532 
    533 func (r *objReader) readInt32() int32 {
    534 	n := r.readInt64()
    535 	if int64(int32(n)) != n {
    536 		log.Panicf("%v out of range for int32", n)
    537 	}
    538 	return int32(n)
    539 }
    540 
    541 func (r *objReader) readInt16() int16 {
    542 	n := r.readInt64()
    543 	if int64(int16(n)) != n {
    544 		log.Panicf("%v out of range for int16", n)
    545 	}
    546 	return int16(n)
    547 }
    548 
    549 func (r *objReader) readUint8() uint8 {
    550 	n := r.readInt64()
    551 	if int64(uint8(n)) != n {
    552 		log.Panicf("%v out of range for uint8", n)
    553 	}
    554 	return uint8(n)
    555 }
    556 
    557 func (r *objReader) readString() string {
    558 	n := r.readInt()
    559 	if cap(r.rdBuf) < n {
    560 		r.rdBuf = make([]byte, 2*n)
    561 	}
    562 	r.readFull(r.rdBuf[:n])
    563 	return string(r.rdBuf[:n])
    564 }
    565 
    566 func (r *objReader) readData() []byte {
    567 	n := r.readInt()
    568 	p := r.data[:n:n]
    569 	r.data = r.data[n:]
    570 	return p
    571 }
    572 
    573 // readSymName reads a symbol name, replacing all "". with pkg.
    574 func (r *objReader) readSymName() string {
    575 	pkg := pathtoprefix(r.lib.Pkg)
    576 	n := r.readInt()
    577 	if n == 0 {
    578 		r.readInt64()
    579 		return ""
    580 	}
    581 	if cap(r.rdBuf) < n {
    582 		r.rdBuf = make([]byte, 2*n)
    583 	}
    584 	origName, err := r.rd.Peek(n)
    585 	if err == bufio.ErrBufferFull {
    586 		// Long symbol names are rare but exist. One source is type
    587 		// symbols for types with long string forms. See #15104.
    588 		origName = make([]byte, n)
    589 		r.readFull(origName)
    590 	} else if err != nil {
    591 		log.Fatalf("%s: error reading symbol: %v", r.pn, err)
    592 	}
    593 	adjName := r.rdBuf[:0]
    594 	for {
    595 		i := bytes.Index(origName, emptyPkg)
    596 		if i == -1 {
    597 			s := string(append(adjName, origName...))
    598 			// Read past the peeked origName, now that we're done with it,
    599 			// using the rfBuf (also no longer used) as the scratch space.
    600 			// TODO: use bufio.Reader.Discard if available instead?
    601 			if err == nil {
    602 				r.readFull(r.rdBuf[:n])
    603 			}
    604 			r.rdBuf = adjName[:0] // in case 2*n wasn't enough
    605 
    606 			if Buildmode == BuildmodeShared || *FlagLinkshared {
    607 				// These types are included in the symbol
    608 				// table when dynamically linking. To keep
    609 				// binary size down, we replace the names
    610 				// with SHA-1 prefixes.
    611 				//
    612 				// Keep the type.. prefix, which parts of the
    613 				// linker (like the DWARF generator) know means
    614 				// the symbol is not decodable.
    615 				//
    616 				// Leave type.runtime. symbols alone, because
    617 				// other parts of the linker manipulates them,
    618 				// and also symbols whose names would not be
    619 				// shortened by this process.
    620 				if len(s) > 14 && strings.HasPrefix(s, "type.") && !strings.HasPrefix(s, "type.runtime.") {
    621 					hash := sha1.Sum([]byte(s))
    622 					prefix := "type."
    623 					if s[5] == '.' {
    624 						prefix = "type.."
    625 					}
    626 					s = prefix + base64.StdEncoding.EncodeToString(hash[:6])
    627 				}
    628 			}
    629 			return s
    630 		}
    631 		adjName = append(adjName, origName[:i]...)
    632 		adjName = append(adjName, pkg...)
    633 		adjName = append(adjName, '.')
    634 		origName = origName[i+len(emptyPkg):]
    635 	}
    636 }
    637 
    638 // Reads the index of a symbol reference and resolves it to a symbol
    639 func (r *objReader) readSymIndex() *Symbol {
    640 	i := r.readInt()
    641 	return r.refs[i]
    642 }
    643