1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package goobj implements reading of Go object files and archives. 6 // 7 // TODO(rsc): Decide where this package should live. (golang.org/issue/6932) 8 // TODO(rsc): Decide the appropriate integer types for various fields. 9 // TODO(rsc): Write tests. (File format still up in the air a little.) 10 package goobj 11 12 import ( 13 "bufio" 14 "bytes" 15 "cmd/internal/obj" 16 "errors" 17 "fmt" 18 "io" 19 "strconv" 20 "strings" 21 ) 22 23 // A SymKind describes the kind of memory represented by a symbol. 24 type SymKind int 25 26 // This list is taken from include/link.h. 27 28 // Defined SymKind values. 29 // TODO(rsc): Give idiomatic Go names. 30 // TODO(rsc): Reduce the number of symbol types in the object files. 31 const ( 32 _ SymKind = iota 33 34 // readonly, executable 35 STEXT SymKind = obj.STEXT 36 SELFRXSECT SymKind = obj.SELFRXSECT 37 38 // readonly, non-executable 39 STYPE SymKind = obj.STYPE 40 SSTRING SymKind = obj.SSTRING 41 SGOSTRING SymKind = obj.SGOSTRING 42 SGOFUNC SymKind = obj.SGOFUNC 43 SRODATA SymKind = obj.SRODATA 44 SFUNCTAB SymKind = obj.SFUNCTAB 45 STYPELINK SymKind = obj.STYPELINK 46 SSYMTAB SymKind = obj.SSYMTAB // TODO: move to unmapped section 47 SPCLNTAB SymKind = obj.SPCLNTAB 48 SELFROSECT SymKind = obj.SELFROSECT 49 50 // writable, non-executable 51 SMACHOPLT SymKind = obj.SMACHOPLT 52 SELFSECT SymKind = obj.SELFSECT 53 SMACHO SymKind = obj.SMACHO // Mach-O __nl_symbol_ptr 54 SMACHOGOT SymKind = obj.SMACHOGOT 55 SWINDOWS SymKind = obj.SWINDOWS 56 SELFGOT SymKind = obj.SELFGOT 57 SNOPTRDATA SymKind = obj.SNOPTRDATA 58 SINITARR SymKind = obj.SINITARR 59 SDATA SymKind = obj.SDATA 60 SBSS SymKind = obj.SBSS 61 SNOPTRBSS SymKind = obj.SNOPTRBSS 62 STLSBSS SymKind = obj.STLSBSS 63 64 // not mapped 65 SXREF SymKind = obj.SXREF 66 SMACHOSYMSTR SymKind = obj.SMACHOSYMSTR 67 SMACHOSYMTAB SymKind = obj.SMACHOSYMTAB 68 SMACHOINDIRECTPLT SymKind = obj.SMACHOINDIRECTPLT 69 SMACHOINDIRECTGOT SymKind = obj.SMACHOINDIRECTGOT 70 SFILE SymKind = obj.SFILE 71 SFILEPATH SymKind = obj.SFILEPATH 72 SCONST SymKind = obj.SCONST 73 SDYNIMPORT SymKind = obj.SDYNIMPORT 74 SHOSTOBJ SymKind = obj.SHOSTOBJ 75 ) 76 77 var symKindStrings = []string{ 78 SBSS: "SBSS", 79 SCONST: "SCONST", 80 SDATA: "SDATA", 81 SDYNIMPORT: "SDYNIMPORT", 82 SELFROSECT: "SELFROSECT", 83 SELFRXSECT: "SELFRXSECT", 84 SELFSECT: "SELFSECT", 85 SFILE: "SFILE", 86 SFILEPATH: "SFILEPATH", 87 SFUNCTAB: "SFUNCTAB", 88 SGOFUNC: "SGOFUNC", 89 SGOSTRING: "SGOSTRING", 90 SHOSTOBJ: "SHOSTOBJ", 91 SINITARR: "SINITARR", 92 SMACHO: "SMACHO", 93 SMACHOGOT: "SMACHOGOT", 94 SMACHOINDIRECTGOT: "SMACHOINDIRECTGOT", 95 SMACHOINDIRECTPLT: "SMACHOINDIRECTPLT", 96 SMACHOPLT: "SMACHOPLT", 97 SMACHOSYMSTR: "SMACHOSYMSTR", 98 SMACHOSYMTAB: "SMACHOSYMTAB", 99 SNOPTRBSS: "SNOPTRBSS", 100 SNOPTRDATA: "SNOPTRDATA", 101 SPCLNTAB: "SPCLNTAB", 102 SRODATA: "SRODATA", 103 SSTRING: "SSTRING", 104 SSYMTAB: "SSYMTAB", 105 STEXT: "STEXT", 106 STLSBSS: "STLSBSS", 107 STYPE: "STYPE", 108 STYPELINK: "STYPELINK", 109 SWINDOWS: "SWINDOWS", 110 SXREF: "SXREF", 111 } 112 113 func (k SymKind) String() string { 114 if k < 0 || int(k) >= len(symKindStrings) { 115 return fmt.Sprintf("SymKind(%d)", k) 116 } 117 return symKindStrings[k] 118 } 119 120 // A Sym is a named symbol in an object file. 121 type Sym struct { 122 SymID // symbol identifier (name and version) 123 Kind SymKind // kind of symbol 124 DupOK bool // are duplicate definitions okay? 125 Size int // size of corresponding data 126 Type SymID // symbol for Go type information 127 Data Data // memory image of symbol 128 Reloc []Reloc // relocations to apply to Data 129 Func *Func // additional data for functions 130 } 131 132 // A SymID - the combination of Name and Version - uniquely identifies 133 // a symbol within a package. 134 type SymID struct { 135 // Name is the name of a symbol. 136 Name string 137 138 // Version is zero for symbols with global visibility. 139 // Symbols with only file visibility (such as file-level static 140 // declarations in C) have a non-zero version distinguishing 141 // a symbol in one file from a symbol of the same name 142 // in another file 143 Version int 144 } 145 146 func (s SymID) String() string { 147 if s.Version == 0 { 148 return s.Name 149 } 150 return fmt.Sprintf("%s<%d>", s.Name, s.Version) 151 } 152 153 // A Data is a reference to data stored in an object file. 154 // It records the offset and size of the data, so that a client can 155 // read the data only if necessary. 156 type Data struct { 157 Offset int64 158 Size int64 159 } 160 161 // A Reloc describes a relocation applied to a memory image to refer 162 // to an address within a particular symbol. 163 type Reloc struct { 164 // The bytes at [Offset, Offset+Size) within the memory image 165 // should be updated to refer to the address Add bytes after the start 166 // of the symbol Sym. 167 Offset int 168 Size int 169 Sym SymID 170 Add int 171 172 // The Type records the form of address expected in the bytes 173 // described by the previous fields: absolute, PC-relative, and so on. 174 // TODO(rsc): The interpretation of Type is not exposed by this package. 175 Type int 176 } 177 178 // A Var describes a variable in a function stack frame: a declared 179 // local variable, an input argument, or an output result. 180 type Var struct { 181 // The combination of Name, Kind, and Offset uniquely 182 // identifies a variable in a function stack frame. 183 // Using fewer of these - in particular, using only Name - does not. 184 Name string // Name of variable. 185 Kind int // TODO(rsc): Define meaning. 186 Offset int // Frame offset. TODO(rsc): Define meaning. 187 188 Type SymID // Go type for variable. 189 } 190 191 // Func contains additional per-symbol information specific to functions. 192 type Func struct { 193 Args int // size in bytes of argument frame: inputs and outputs 194 Frame int // size in bytes of local variable frame 195 Leaf bool // function omits save of link register (ARM) 196 NoSplit bool // function omits stack split prologue 197 Var []Var // detail about local variables 198 PCSP Data // PC SP offset map 199 PCFile Data // PC file number map (index into File) 200 PCLine Data // PC line number map 201 PCData []Data // PC runtime support data map 202 FuncData []FuncData // non-PC-specific runtime support data 203 File []string // paths indexed by PCFile 204 } 205 206 // TODO: Add PCData []byte and PCDataIter (similar to liblink). 207 208 // A FuncData is a single function-specific data value. 209 type FuncData struct { 210 Sym SymID // symbol holding data 211 Offset int64 // offset into symbol for funcdata pointer 212 } 213 214 // A Package is a parsed Go object file or archive defining a Go package. 215 type Package struct { 216 ImportPath string // import path denoting this package 217 Imports []string // packages imported by this package 218 Syms []*Sym // symbols defined by this package 219 MaxVersion int // maximum Version in any SymID in Syms 220 } 221 222 var ( 223 archiveHeader = []byte("!<arch>\n") 224 archiveMagic = []byte("`\n") 225 goobjHeader = []byte("go objec") // truncated to size of archiveHeader 226 227 errCorruptArchive = errors.New("corrupt archive") 228 errTruncatedArchive = errors.New("truncated archive") 229 errNotArchive = errors.New("unrecognized archive format") 230 231 errCorruptObject = errors.New("corrupt object file") 232 errTruncatedObject = errors.New("truncated object file") 233 errNotObject = errors.New("unrecognized object file format") 234 ) 235 236 // An objReader is an object file reader. 237 type objReader struct { 238 p *Package 239 b *bufio.Reader 240 f io.ReadSeeker 241 err error 242 offset int64 243 limit int64 244 tmp [256]byte 245 pkg string 246 pkgprefix string 247 } 248 249 // importPathToPrefix returns the prefix that will be used in the 250 // final symbol table for the given import path. 251 // We escape '%', '"', all control characters and non-ASCII bytes, 252 // and any '.' after the final slash. 253 // 254 // See ../../../cmd/ld/lib.c:/^pathtoprefix and 255 // ../../../cmd/gc/subr.c:/^pathtoprefix. 256 func importPathToPrefix(s string) string { 257 // find index of last slash, if any, or else -1. 258 // used for determining whether an index is after the last slash. 259 slash := strings.LastIndex(s, "/") 260 261 // check for chars that need escaping 262 n := 0 263 for r := 0; r < len(s); r++ { 264 if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F { 265 n++ 266 } 267 } 268 269 // quick exit 270 if n == 0 { 271 return s 272 } 273 274 // escape 275 const hex = "0123456789abcdef" 276 p := make([]byte, 0, len(s)+2*n) 277 for r := 0; r < len(s); r++ { 278 if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F { 279 p = append(p, '%', hex[c>>4], hex[c&0xF]) 280 } else { 281 p = append(p, c) 282 } 283 } 284 285 return string(p) 286 } 287 288 // init initializes r to read package p from f. 289 func (r *objReader) init(f io.ReadSeeker, p *Package) { 290 r.f = f 291 r.p = p 292 r.offset, _ = f.Seek(0, 1) 293 r.limit, _ = f.Seek(0, 2) 294 f.Seek(r.offset, 0) 295 r.b = bufio.NewReader(f) 296 r.pkgprefix = importPathToPrefix(p.ImportPath) + "." 297 } 298 299 // error records that an error occurred. 300 // It returns only the first error, so that an error 301 // caused by an earlier error does not discard information 302 // about the earlier error. 303 func (r *objReader) error(err error) error { 304 if r.err == nil { 305 if err == io.EOF { 306 err = io.ErrUnexpectedEOF 307 } 308 r.err = err 309 } 310 // panic("corrupt") // useful for debugging 311 return r.err 312 } 313 314 // readByte reads and returns a byte from the input file. 315 // On I/O error or EOF, it records the error but returns byte 0. 316 // A sequence of 0 bytes will eventually terminate any 317 // parsing state in the object file. In particular, it ends the 318 // reading of a varint. 319 func (r *objReader) readByte() byte { 320 if r.err != nil { 321 return 0 322 } 323 if r.offset >= r.limit { 324 r.error(io.ErrUnexpectedEOF) 325 return 0 326 } 327 b, err := r.b.ReadByte() 328 if err != nil { 329 if err == io.EOF { 330 err = io.ErrUnexpectedEOF 331 } 332 r.error(err) 333 b = 0 334 } else { 335 r.offset++ 336 } 337 return b 338 } 339 340 // read reads exactly len(b) bytes from the input file. 341 // If an error occurs, read returns the error but also 342 // records it, so it is safe for callers to ignore the result 343 // as long as delaying the report is not a problem. 344 func (r *objReader) readFull(b []byte) error { 345 if r.err != nil { 346 return r.err 347 } 348 if r.offset+int64(len(b)) > r.limit { 349 return r.error(io.ErrUnexpectedEOF) 350 } 351 n, err := io.ReadFull(r.b, b) 352 r.offset += int64(n) 353 if err != nil { 354 return r.error(err) 355 } 356 return nil 357 } 358 359 // readInt reads a zigzag varint from the input file. 360 func (r *objReader) readInt() int { 361 var u uint64 362 363 for shift := uint(0); ; shift += 7 { 364 if shift >= 64 { 365 r.error(errCorruptObject) 366 return 0 367 } 368 c := r.readByte() 369 u |= uint64(c&0x7F) << shift 370 if c&0x80 == 0 { 371 break 372 } 373 } 374 375 v := int64(u>>1) ^ (int64(u) << 63 >> 63) 376 if int64(int(v)) != v { 377 r.error(errCorruptObject) // TODO 378 return 0 379 } 380 return int(v) 381 } 382 383 // readString reads a length-delimited string from the input file. 384 func (r *objReader) readString() string { 385 n := r.readInt() 386 buf := make([]byte, n) 387 r.readFull(buf) 388 return string(buf) 389 } 390 391 // readSymID reads a SymID from the input file. 392 func (r *objReader) readSymID() SymID { 393 name, vers := r.readString(), r.readInt() 394 395 // In a symbol name in an object file, "". denotes the 396 // prefix for the package in which the object file has been found. 397 // Expand it. 398 name = strings.Replace(name, `"".`, r.pkgprefix, -1) 399 400 // An individual object file only records version 0 (extern) or 1 (static). 401 // To make static symbols unique across all files being read, we 402 // replace version 1 with the version corresponding to the current 403 // file number. The number is incremented on each call to parseObject. 404 if vers != 0 { 405 vers = r.p.MaxVersion 406 } 407 408 return SymID{name, vers} 409 } 410 411 // readData reads a data reference from the input file. 412 func (r *objReader) readData() Data { 413 n := r.readInt() 414 d := Data{Offset: r.offset, Size: int64(n)} 415 r.skip(int64(n)) 416 return d 417 } 418 419 // skip skips n bytes in the input. 420 func (r *objReader) skip(n int64) { 421 if n < 0 { 422 r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip")) 423 } 424 if n < int64(len(r.tmp)) { 425 // Since the data is so small, a just reading from the buffered 426 // reader is better than flushing the buffer and seeking. 427 r.readFull(r.tmp[:n]) 428 } else if n <= int64(r.b.Buffered()) { 429 // Even though the data is not small, it has already been read. 430 // Advance the buffer instead of seeking. 431 for n > int64(len(r.tmp)) { 432 r.readFull(r.tmp[:]) 433 n -= int64(len(r.tmp)) 434 } 435 r.readFull(r.tmp[:n]) 436 } else { 437 // Seek, giving up buffered data. 438 _, err := r.f.Seek(r.offset+n, 0) 439 if err != nil { 440 r.error(err) 441 } 442 r.offset += n 443 r.b.Reset(r.f) 444 } 445 } 446 447 // Parse parses an object file or archive from r, 448 // assuming that its import path is pkgpath. 449 func Parse(r io.ReadSeeker, pkgpath string) (*Package, error) { 450 if pkgpath == "" { 451 pkgpath = `""` 452 } 453 p := new(Package) 454 p.ImportPath = pkgpath 455 456 var rd objReader 457 rd.init(r, p) 458 err := rd.readFull(rd.tmp[:8]) 459 if err != nil { 460 if err == io.EOF { 461 err = io.ErrUnexpectedEOF 462 } 463 return nil, err 464 } 465 466 switch { 467 default: 468 return nil, errNotObject 469 470 case bytes.Equal(rd.tmp[:8], archiveHeader): 471 if err := rd.parseArchive(); err != nil { 472 return nil, err 473 } 474 case bytes.Equal(rd.tmp[:8], goobjHeader): 475 if err := rd.parseObject(goobjHeader); err != nil { 476 return nil, err 477 } 478 } 479 480 return p, nil 481 } 482 483 // trimSpace removes trailing spaces from b and returns the corresponding string. 484 // This effectively parses the form used in archive headers. 485 func trimSpace(b []byte) string { 486 return string(bytes.TrimRight(b, " ")) 487 } 488 489 // parseArchive parses a Unix archive of Go object files. 490 // TODO(rsc): Need to skip non-Go object files. 491 // TODO(rsc): Maybe record table of contents in r.p so that 492 // linker can avoid having code to parse archives too. 493 func (r *objReader) parseArchive() error { 494 for r.offset < r.limit { 495 if err := r.readFull(r.tmp[:60]); err != nil { 496 return err 497 } 498 data := r.tmp[:60] 499 500 // Each file is preceded by this text header (slice indices in first column): 501 // 0:16 name 502 // 16:28 date 503 // 28:34 uid 504 // 34:40 gid 505 // 40:48 mode 506 // 48:58 size 507 // 58:60 magic - `\n 508 // We only care about name, size, and magic. 509 // The fields are space-padded on the right. 510 // The size is in decimal. 511 // The file data - size bytes - follows the header. 512 // Headers are 2-byte aligned, so if size is odd, an extra padding 513 // byte sits between the file data and the next header. 514 // The file data that follows is padded to an even number of bytes: 515 // if size is odd, an extra padding byte is inserted betw the next header. 516 if len(data) < 60 { 517 return errTruncatedArchive 518 } 519 if !bytes.Equal(data[58:60], archiveMagic) { 520 return errCorruptArchive 521 } 522 name := trimSpace(data[0:16]) 523 size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64) 524 if err != nil { 525 return errCorruptArchive 526 } 527 data = data[60:] 528 fsize := size + size&1 529 if fsize < 0 || fsize < size { 530 return errCorruptArchive 531 } 532 switch name { 533 case "__.SYMDEF", "__.GOSYMDEF", "__.PKGDEF": 534 r.skip(size) 535 default: 536 oldLimit := r.limit 537 r.limit = r.offset + size 538 if err := r.parseObject(nil); err != nil { 539 return fmt.Errorf("parsing archive member %q: %v", name, err) 540 } 541 r.skip(r.limit - r.offset) 542 r.limit = oldLimit 543 } 544 if size&1 != 0 { 545 r.skip(1) 546 } 547 } 548 return nil 549 } 550 551 // parseObject parses a single Go object file. 552 // The prefix is the bytes already read from the file, 553 // typically in order to detect that this is an object file. 554 // The object file consists of a textual header ending in "\n!\n" 555 // and then the part we want to parse begins. 556 // The format of that part is defined in a comment at the top 557 // of src/liblink/objfile.c. 558 func (r *objReader) parseObject(prefix []byte) error { 559 // TODO(rsc): Maybe use prefix and the initial input to 560 // record the header line from the file, which would 561 // give the architecture and other version information. 562 563 r.p.MaxVersion++ 564 var c1, c2, c3 byte 565 for { 566 c1, c2, c3 = c2, c3, r.readByte() 567 if c3 == 0 { // NUL or EOF, either is bad 568 return errCorruptObject 569 } 570 if c1 == '\n' && c2 == '!' && c3 == '\n' { 571 break 572 } 573 } 574 575 r.readFull(r.tmp[:8]) 576 if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go13ld")) { 577 return r.error(errCorruptObject) 578 } 579 580 b := r.readByte() 581 if b != 1 { 582 return r.error(errCorruptObject) 583 } 584 585 // Direct package dependencies. 586 for { 587 s := r.readString() 588 if s == "" { 589 break 590 } 591 r.p.Imports = append(r.p.Imports, s) 592 } 593 594 // Symbols. 595 for { 596 if b := r.readByte(); b != 0xfe { 597 if b != 0xff { 598 return r.error(errCorruptObject) 599 } 600 break 601 } 602 603 typ := r.readInt() 604 s := &Sym{SymID: r.readSymID()} 605 r.p.Syms = append(r.p.Syms, s) 606 s.Kind = SymKind(typ) 607 flags := r.readInt() 608 s.DupOK = flags&1 != 0 609 s.Size = r.readInt() 610 s.Type = r.readSymID() 611 s.Data = r.readData() 612 s.Reloc = make([]Reloc, r.readInt()) 613 for i := range s.Reloc { 614 rel := &s.Reloc[i] 615 rel.Offset = r.readInt() 616 rel.Size = r.readInt() 617 rel.Type = r.readInt() 618 rel.Add = r.readInt() 619 r.readInt() // Xadd - ignored 620 rel.Sym = r.readSymID() 621 r.readSymID() // Xsym - ignored 622 } 623 624 if s.Kind == STEXT { 625 f := new(Func) 626 s.Func = f 627 f.Args = r.readInt() 628 f.Frame = r.readInt() 629 flags := r.readInt() 630 f.Leaf = flags&1 != 0 631 f.NoSplit = r.readInt() != 0 632 f.Var = make([]Var, r.readInt()) 633 for i := range f.Var { 634 v := &f.Var[i] 635 v.Name = r.readSymID().Name 636 v.Offset = r.readInt() 637 v.Kind = r.readInt() 638 v.Type = r.readSymID() 639 } 640 641 f.PCSP = r.readData() 642 f.PCFile = r.readData() 643 f.PCLine = r.readData() 644 f.PCData = make([]Data, r.readInt()) 645 for i := range f.PCData { 646 f.PCData[i] = r.readData() 647 } 648 f.FuncData = make([]FuncData, r.readInt()) 649 for i := range f.FuncData { 650 f.FuncData[i].Sym = r.readSymID() 651 } 652 for i := range f.FuncData { 653 f.FuncData[i].Offset = int64(r.readInt()) // TODO 654 } 655 f.File = make([]string, r.readInt()) 656 for i := range f.File { 657 f.File[i] = r.readSymID().Name 658 } 659 } 660 } 661 662 r.readFull(r.tmp[:7]) 663 if !bytes.Equal(r.tmp[:7], []byte("\xffgo13ld")) { 664 return r.error(errCorruptObject) 665 } 666 667 return nil 668 } 669