Home | History | Annotate | Download | only in macho
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package macho implements access to Mach-O object files.
      6 package macho
      7 
      8 // High level access to low level data structures.
      9 
     10 import (
     11 	"bytes"
     12 	"debug/dwarf"
     13 	"encoding/binary"
     14 	"fmt"
     15 	"io"
     16 	"os"
     17 )
     18 
     19 // A File represents an open Mach-O file.
     20 type File struct {
     21 	FileHeader
     22 	ByteOrder binary.ByteOrder
     23 	Loads     []Load
     24 	Sections  []*Section
     25 
     26 	Symtab   *Symtab
     27 	Dysymtab *Dysymtab
     28 
     29 	closer io.Closer
     30 }
     31 
     32 // A Load represents any Mach-O load command.
     33 type Load interface {
     34 	Raw() []byte
     35 }
     36 
     37 // A LoadBytes is the uninterpreted bytes of a Mach-O load command.
     38 type LoadBytes []byte
     39 
     40 func (b LoadBytes) Raw() []byte { return b }
     41 
     42 // A SegmentHeader is the header for a Mach-O 32-bit or 64-bit load segment command.
     43 type SegmentHeader struct {
     44 	Cmd     LoadCmd
     45 	Len     uint32
     46 	Name    string
     47 	Addr    uint64
     48 	Memsz   uint64
     49 	Offset  uint64
     50 	Filesz  uint64
     51 	Maxprot uint32
     52 	Prot    uint32
     53 	Nsect   uint32
     54 	Flag    uint32
     55 }
     56 
     57 // A Segment represents a Mach-O 32-bit or 64-bit load segment command.
     58 type Segment struct {
     59 	LoadBytes
     60 	SegmentHeader
     61 
     62 	// Embed ReaderAt for ReadAt method.
     63 	// Do not embed SectionReader directly
     64 	// to avoid having Read and Seek.
     65 	// If a client wants Read and Seek it must use
     66 	// Open() to avoid fighting over the seek offset
     67 	// with other clients.
     68 	io.ReaderAt
     69 	sr *io.SectionReader
     70 }
     71 
     72 // Data reads and returns the contents of the segment.
     73 func (s *Segment) Data() ([]byte, error) {
     74 	dat := make([]byte, s.sr.Size())
     75 	n, err := s.sr.ReadAt(dat, 0)
     76 	if n == len(dat) {
     77 		err = nil
     78 	}
     79 	return dat[0:n], err
     80 }
     81 
     82 // Open returns a new ReadSeeker reading the segment.
     83 func (s *Segment) Open() io.ReadSeeker { return io.NewSectionReader(s.sr, 0, 1<<63-1) }
     84 
     85 type SectionHeader struct {
     86 	Name   string
     87 	Seg    string
     88 	Addr   uint64
     89 	Size   uint64
     90 	Offset uint32
     91 	Align  uint32
     92 	Reloff uint32
     93 	Nreloc uint32
     94 	Flags  uint32
     95 }
     96 
     97 type Section struct {
     98 	SectionHeader
     99 
    100 	// Embed ReaderAt for ReadAt method.
    101 	// Do not embed SectionReader directly
    102 	// to avoid having Read and Seek.
    103 	// If a client wants Read and Seek it must use
    104 	// Open() to avoid fighting over the seek offset
    105 	// with other clients.
    106 	io.ReaderAt
    107 	sr *io.SectionReader
    108 }
    109 
    110 // Data reads and returns the contents of the Mach-O section.
    111 func (s *Section) Data() ([]byte, error) {
    112 	dat := make([]byte, s.sr.Size())
    113 	n, err := s.sr.ReadAt(dat, 0)
    114 	if n == len(dat) {
    115 		err = nil
    116 	}
    117 	return dat[0:n], err
    118 }
    119 
    120 // Open returns a new ReadSeeker reading the Mach-O section.
    121 func (s *Section) Open() io.ReadSeeker { return io.NewSectionReader(s.sr, 0, 1<<63-1) }
    122 
    123 // A Dylib represents a Mach-O load dynamic library command.
    124 type Dylib struct {
    125 	LoadBytes
    126 	Name           string
    127 	Time           uint32
    128 	CurrentVersion uint32
    129 	CompatVersion  uint32
    130 }
    131 
    132 // A Symtab represents a Mach-O symbol table command.
    133 type Symtab struct {
    134 	LoadBytes
    135 	SymtabCmd
    136 	Syms []Symbol
    137 }
    138 
    139 // A Dysymtab represents a Mach-O dynamic symbol table command.
    140 type Dysymtab struct {
    141 	LoadBytes
    142 	DysymtabCmd
    143 	IndirectSyms []uint32 // indices into Symtab.Syms
    144 }
    145 
    146 /*
    147  * Mach-O reader
    148  */
    149 
    150 // FormatError is returned by some operations if the data does
    151 // not have the correct format for an object file.
    152 type FormatError struct {
    153 	off int64
    154 	msg string
    155 	val interface{}
    156 }
    157 
    158 func (e *FormatError) Error() string {
    159 	msg := e.msg
    160 	if e.val != nil {
    161 		msg += fmt.Sprintf(" '%v'", e.val)
    162 	}
    163 	msg += fmt.Sprintf(" in record at byte %#x", e.off)
    164 	return msg
    165 }
    166 
    167 // Open opens the named file using os.Open and prepares it for use as a Mach-O binary.
    168 func Open(name string) (*File, error) {
    169 	f, err := os.Open(name)
    170 	if err != nil {
    171 		return nil, err
    172 	}
    173 	ff, err := NewFile(f)
    174 	if err != nil {
    175 		f.Close()
    176 		return nil, err
    177 	}
    178 	ff.closer = f
    179 	return ff, nil
    180 }
    181 
    182 // Close closes the File.
    183 // If the File was created using NewFile directly instead of Open,
    184 // Close has no effect.
    185 func (f *File) Close() error {
    186 	var err error
    187 	if f.closer != nil {
    188 		err = f.closer.Close()
    189 		f.closer = nil
    190 	}
    191 	return err
    192 }
    193 
    194 // NewFile creates a new File for accessing a Mach-O binary in an underlying reader.
    195 // The Mach-O binary is expected to start at position 0 in the ReaderAt.
    196 func NewFile(r io.ReaderAt) (*File, error) {
    197 	f := new(File)
    198 	sr := io.NewSectionReader(r, 0, 1<<63-1)
    199 
    200 	// Read and decode Mach magic to determine byte order, size.
    201 	// Magic32 and Magic64 differ only in the bottom bit.
    202 	var ident [4]byte
    203 	if _, err := r.ReadAt(ident[0:], 0); err != nil {
    204 		return nil, err
    205 	}
    206 	be := binary.BigEndian.Uint32(ident[0:])
    207 	le := binary.LittleEndian.Uint32(ident[0:])
    208 	switch Magic32 &^ 1 {
    209 	case be &^ 1:
    210 		f.ByteOrder = binary.BigEndian
    211 		f.Magic = be
    212 	case le &^ 1:
    213 		f.ByteOrder = binary.LittleEndian
    214 		f.Magic = le
    215 	default:
    216 		return nil, &FormatError{0, "invalid magic number", nil}
    217 	}
    218 
    219 	// Read entire file header.
    220 	if err := binary.Read(sr, f.ByteOrder, &f.FileHeader); err != nil {
    221 		return nil, err
    222 	}
    223 
    224 	// Then load commands.
    225 	offset := int64(fileHeaderSize32)
    226 	if f.Magic == Magic64 {
    227 		offset = fileHeaderSize64
    228 	}
    229 	dat := make([]byte, f.Cmdsz)
    230 	if _, err := r.ReadAt(dat, offset); err != nil {
    231 		return nil, err
    232 	}
    233 	f.Loads = make([]Load, f.Ncmd)
    234 	bo := f.ByteOrder
    235 	for i := range f.Loads {
    236 		// Each load command begins with uint32 command and length.
    237 		if len(dat) < 8 {
    238 			return nil, &FormatError{offset, "command block too small", nil}
    239 		}
    240 		cmd, siz := LoadCmd(bo.Uint32(dat[0:4])), bo.Uint32(dat[4:8])
    241 		if siz < 8 || siz > uint32(len(dat)) {
    242 			return nil, &FormatError{offset, "invalid command block size", nil}
    243 		}
    244 		var cmddat []byte
    245 		cmddat, dat = dat[0:siz], dat[siz:]
    246 		offset += int64(siz)
    247 		var s *Segment
    248 		switch cmd {
    249 		default:
    250 			f.Loads[i] = LoadBytes(cmddat)
    251 
    252 		case LoadCmdDylib:
    253 			var hdr DylibCmd
    254 			b := bytes.NewReader(cmddat)
    255 			if err := binary.Read(b, bo, &hdr); err != nil {
    256 				return nil, err
    257 			}
    258 			l := new(Dylib)
    259 			if hdr.Name >= uint32(len(cmddat)) {
    260 				return nil, &FormatError{offset, "invalid name in dynamic library command", hdr.Name}
    261 			}
    262 			l.Name = cstring(cmddat[hdr.Name:])
    263 			l.Time = hdr.Time
    264 			l.CurrentVersion = hdr.CurrentVersion
    265 			l.CompatVersion = hdr.CompatVersion
    266 			l.LoadBytes = LoadBytes(cmddat)
    267 			f.Loads[i] = l
    268 
    269 		case LoadCmdSymtab:
    270 			var hdr SymtabCmd
    271 			b := bytes.NewReader(cmddat)
    272 			if err := binary.Read(b, bo, &hdr); err != nil {
    273 				return nil, err
    274 			}
    275 			strtab := make([]byte, hdr.Strsize)
    276 			if _, err := r.ReadAt(strtab, int64(hdr.Stroff)); err != nil {
    277 				return nil, err
    278 			}
    279 			var symsz int
    280 			if f.Magic == Magic64 {
    281 				symsz = 16
    282 			} else {
    283 				symsz = 12
    284 			}
    285 			symdat := make([]byte, int(hdr.Nsyms)*symsz)
    286 			if _, err := r.ReadAt(symdat, int64(hdr.Symoff)); err != nil {
    287 				return nil, err
    288 			}
    289 			st, err := f.parseSymtab(symdat, strtab, cmddat, &hdr, offset)
    290 			if err != nil {
    291 				return nil, err
    292 			}
    293 			f.Loads[i] = st
    294 			f.Symtab = st
    295 
    296 		case LoadCmdDysymtab:
    297 			var hdr DysymtabCmd
    298 			b := bytes.NewReader(cmddat)
    299 			if err := binary.Read(b, bo, &hdr); err != nil {
    300 				return nil, err
    301 			}
    302 			dat := make([]byte, hdr.Nindirectsyms*4)
    303 			if _, err := r.ReadAt(dat, int64(hdr.Indirectsymoff)); err != nil {
    304 				return nil, err
    305 			}
    306 			x := make([]uint32, hdr.Nindirectsyms)
    307 			if err := binary.Read(bytes.NewReader(dat), bo, x); err != nil {
    308 				return nil, err
    309 			}
    310 			st := new(Dysymtab)
    311 			st.LoadBytes = LoadBytes(cmddat)
    312 			st.DysymtabCmd = hdr
    313 			st.IndirectSyms = x
    314 			f.Loads[i] = st
    315 			f.Dysymtab = st
    316 
    317 		case LoadCmdSegment:
    318 			var seg32 Segment32
    319 			b := bytes.NewReader(cmddat)
    320 			if err := binary.Read(b, bo, &seg32); err != nil {
    321 				return nil, err
    322 			}
    323 			s = new(Segment)
    324 			s.LoadBytes = cmddat
    325 			s.Cmd = cmd
    326 			s.Len = siz
    327 			s.Name = cstring(seg32.Name[0:])
    328 			s.Addr = uint64(seg32.Addr)
    329 			s.Memsz = uint64(seg32.Memsz)
    330 			s.Offset = uint64(seg32.Offset)
    331 			s.Filesz = uint64(seg32.Filesz)
    332 			s.Maxprot = seg32.Maxprot
    333 			s.Prot = seg32.Prot
    334 			s.Nsect = seg32.Nsect
    335 			s.Flag = seg32.Flag
    336 			f.Loads[i] = s
    337 			for i := 0; i < int(s.Nsect); i++ {
    338 				var sh32 Section32
    339 				if err := binary.Read(b, bo, &sh32); err != nil {
    340 					return nil, err
    341 				}
    342 				sh := new(Section)
    343 				sh.Name = cstring(sh32.Name[0:])
    344 				sh.Seg = cstring(sh32.Seg[0:])
    345 				sh.Addr = uint64(sh32.Addr)
    346 				sh.Size = uint64(sh32.Size)
    347 				sh.Offset = sh32.Offset
    348 				sh.Align = sh32.Align
    349 				sh.Reloff = sh32.Reloff
    350 				sh.Nreloc = sh32.Nreloc
    351 				sh.Flags = sh32.Flags
    352 				f.pushSection(sh, r)
    353 			}
    354 
    355 		case LoadCmdSegment64:
    356 			var seg64 Segment64
    357 			b := bytes.NewReader(cmddat)
    358 			if err := binary.Read(b, bo, &seg64); err != nil {
    359 				return nil, err
    360 			}
    361 			s = new(Segment)
    362 			s.LoadBytes = cmddat
    363 			s.Cmd = cmd
    364 			s.Len = siz
    365 			s.Name = cstring(seg64.Name[0:])
    366 			s.Addr = seg64.Addr
    367 			s.Memsz = seg64.Memsz
    368 			s.Offset = seg64.Offset
    369 			s.Filesz = seg64.Filesz
    370 			s.Maxprot = seg64.Maxprot
    371 			s.Prot = seg64.Prot
    372 			s.Nsect = seg64.Nsect
    373 			s.Flag = seg64.Flag
    374 			f.Loads[i] = s
    375 			for i := 0; i < int(s.Nsect); i++ {
    376 				var sh64 Section64
    377 				if err := binary.Read(b, bo, &sh64); err != nil {
    378 					return nil, err
    379 				}
    380 				sh := new(Section)
    381 				sh.Name = cstring(sh64.Name[0:])
    382 				sh.Seg = cstring(sh64.Seg[0:])
    383 				sh.Addr = sh64.Addr
    384 				sh.Size = sh64.Size
    385 				sh.Offset = sh64.Offset
    386 				sh.Align = sh64.Align
    387 				sh.Reloff = sh64.Reloff
    388 				sh.Nreloc = sh64.Nreloc
    389 				sh.Flags = sh64.Flags
    390 				f.pushSection(sh, r)
    391 			}
    392 		}
    393 		if s != nil {
    394 			s.sr = io.NewSectionReader(r, int64(s.Offset), int64(s.Filesz))
    395 			s.ReaderAt = s.sr
    396 		}
    397 	}
    398 	return f, nil
    399 }
    400 
    401 func (f *File) parseSymtab(symdat, strtab, cmddat []byte, hdr *SymtabCmd, offset int64) (*Symtab, error) {
    402 	bo := f.ByteOrder
    403 	symtab := make([]Symbol, hdr.Nsyms)
    404 	b := bytes.NewReader(symdat)
    405 	for i := range symtab {
    406 		var n Nlist64
    407 		if f.Magic == Magic64 {
    408 			if err := binary.Read(b, bo, &n); err != nil {
    409 				return nil, err
    410 			}
    411 		} else {
    412 			var n32 Nlist32
    413 			if err := binary.Read(b, bo, &n32); err != nil {
    414 				return nil, err
    415 			}
    416 			n.Name = n32.Name
    417 			n.Type = n32.Type
    418 			n.Sect = n32.Sect
    419 			n.Desc = n32.Desc
    420 			n.Value = uint64(n32.Value)
    421 		}
    422 		sym := &symtab[i]
    423 		if n.Name >= uint32(len(strtab)) {
    424 			return nil, &FormatError{offset, "invalid name in symbol table", n.Name}
    425 		}
    426 		sym.Name = cstring(strtab[n.Name:])
    427 		sym.Type = n.Type
    428 		sym.Sect = n.Sect
    429 		sym.Desc = n.Desc
    430 		sym.Value = n.Value
    431 	}
    432 	st := new(Symtab)
    433 	st.LoadBytes = LoadBytes(cmddat)
    434 	st.Syms = symtab
    435 	return st, nil
    436 }
    437 
    438 func (f *File) pushSection(sh *Section, r io.ReaderAt) {
    439 	f.Sections = append(f.Sections, sh)
    440 	sh.sr = io.NewSectionReader(r, int64(sh.Offset), int64(sh.Size))
    441 	sh.ReaderAt = sh.sr
    442 }
    443 
    444 func cstring(b []byte) string {
    445 	var i int
    446 	for i = 0; i < len(b) && b[i] != 0; i++ {
    447 	}
    448 	return string(b[0:i])
    449 }
    450 
    451 // Segment returns the first Segment with the given name, or nil if no such segment exists.
    452 func (f *File) Segment(name string) *Segment {
    453 	for _, l := range f.Loads {
    454 		if s, ok := l.(*Segment); ok && s.Name == name {
    455 			return s
    456 		}
    457 	}
    458 	return nil
    459 }
    460 
    461 // Section returns the first section with the given name, or nil if no such
    462 // section exists.
    463 func (f *File) Section(name string) *Section {
    464 	for _, s := range f.Sections {
    465 		if s.Name == name {
    466 			return s
    467 		}
    468 	}
    469 	return nil
    470 }
    471 
    472 // DWARF returns the DWARF debug information for the Mach-O file.
    473 func (f *File) DWARF() (*dwarf.Data, error) {
    474 	// There are many other DWARF sections, but these
    475 	// are the ones the debug/dwarf package uses.
    476 	// Don't bother loading others.
    477 	var names = [...]string{"abbrev", "info", "line", "ranges", "str"}
    478 	var dat [len(names)][]byte
    479 	for i, name := range names {
    480 		name = "__debug_" + name
    481 		s := f.Section(name)
    482 		if s == nil {
    483 			continue
    484 		}
    485 		b, err := s.Data()
    486 		if err != nil && uint64(len(b)) < s.Size {
    487 			return nil, err
    488 		}
    489 		dat[i] = b
    490 	}
    491 
    492 	abbrev, info, line, ranges, str := dat[0], dat[1], dat[2], dat[3], dat[4]
    493 	return dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str)
    494 }
    495 
    496 // ImportedSymbols returns the names of all symbols
    497 // referred to by the binary f that are expected to be
    498 // satisfied by other libraries at dynamic load time.
    499 func (f *File) ImportedSymbols() ([]string, error) {
    500 	if f.Dysymtab == nil || f.Symtab == nil {
    501 		return nil, &FormatError{0, "missing symbol table", nil}
    502 	}
    503 
    504 	st := f.Symtab
    505 	dt := f.Dysymtab
    506 	var all []string
    507 	for _, s := range st.Syms[dt.Iundefsym : dt.Iundefsym+dt.Nundefsym] {
    508 		all = append(all, s.Name)
    509 	}
    510 	return all, nil
    511 }
    512 
    513 // ImportedLibraries returns the paths of all libraries
    514 // referred to by the binary f that are expected to be
    515 // linked with the binary at dynamic link time.
    516 func (f *File) ImportedLibraries() ([]string, error) {
    517 	var all []string
    518 	for _, l := range f.Loads {
    519 		if lib, ok := l.(*Dylib); ok {
    520 			all = append(all, lib.Name)
    521 		}
    522 	}
    523 	return all, nil
    524 }
    525