Home | History | Annotate | Download | only in dwarf
      1 // Copyright 2009 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // DWARF debug information entry parser.
      6 // An entry is a sequence of data items of a given format.
      7 // The first word in the entry is an index into what DWARF
      8 // calls the ``abbreviation table.''  An abbreviation is really
      9 // just a type descriptor: it's an array of attribute tag/value format pairs.
     10 
     11 package dwarf
     12 
     13 import (
     14 	"errors"
     15 	"strconv"
     16 )
     17 
     18 // a single entry's description: a sequence of attributes
     19 type abbrev struct {
     20 	tag      Tag
     21 	children bool
     22 	field    []afield
     23 }
     24 
     25 type afield struct {
     26 	attr  Attr
     27 	fmt   format
     28 	class Class
     29 }
     30 
     31 // a map from entry format ids to their descriptions
     32 type abbrevTable map[uint32]abbrev
     33 
     34 // ParseAbbrev returns the abbreviation table that starts at byte off
     35 // in the .debug_abbrev section.
     36 func (d *Data) parseAbbrev(off uint32, vers int) (abbrevTable, error) {
     37 	if m, ok := d.abbrevCache[off]; ok {
     38 		return m, nil
     39 	}
     40 
     41 	data := d.abbrev
     42 	if off > uint32(len(data)) {
     43 		data = nil
     44 	} else {
     45 		data = data[off:]
     46 	}
     47 	b := makeBuf(d, unknownFormat{}, "abbrev", 0, data)
     48 
     49 	// Error handling is simplified by the buf getters
     50 	// returning an endless stream of 0s after an error.
     51 	m := make(abbrevTable)
     52 	for {
     53 		// Table ends with id == 0.
     54 		id := uint32(b.uint())
     55 		if id == 0 {
     56 			break
     57 		}
     58 
     59 		// Walk over attributes, counting.
     60 		n := 0
     61 		b1 := b // Read from copy of b.
     62 		b1.uint()
     63 		b1.uint8()
     64 		for {
     65 			tag := b1.uint()
     66 			fmt := b1.uint()
     67 			if tag == 0 && fmt == 0 {
     68 				break
     69 			}
     70 			n++
     71 		}
     72 		if b1.err != nil {
     73 			return nil, b1.err
     74 		}
     75 
     76 		// Walk over attributes again, this time writing them down.
     77 		var a abbrev
     78 		a.tag = Tag(b.uint())
     79 		a.children = b.uint8() != 0
     80 		a.field = make([]afield, n)
     81 		for i := range a.field {
     82 			a.field[i].attr = Attr(b.uint())
     83 			a.field[i].fmt = format(b.uint())
     84 			a.field[i].class = formToClass(a.field[i].fmt, a.field[i].attr, vers, &b)
     85 		}
     86 		b.uint()
     87 		b.uint()
     88 
     89 		m[id] = a
     90 	}
     91 	if b.err != nil {
     92 		return nil, b.err
     93 	}
     94 	d.abbrevCache[off] = m
     95 	return m, nil
     96 }
     97 
     98 // attrIsExprloc indicates attributes that allow exprloc values that
     99 // are encoded as block values in DWARF 2 and 3. See DWARF 4, Figure
    100 // 20.
    101 var attrIsExprloc = map[Attr]bool{
    102 	AttrLocation:      true,
    103 	AttrByteSize:      true,
    104 	AttrBitOffset:     true,
    105 	AttrBitSize:       true,
    106 	AttrStringLength:  true,
    107 	AttrLowerBound:    true,
    108 	AttrReturnAddr:    true,
    109 	AttrStrideSize:    true,
    110 	AttrUpperBound:    true,
    111 	AttrCount:         true,
    112 	AttrDataMemberLoc: true,
    113 	AttrFrameBase:     true,
    114 	AttrSegment:       true,
    115 	AttrStaticLink:    true,
    116 	AttrUseLocation:   true,
    117 	AttrVtableElemLoc: true,
    118 	AttrAllocated:     true,
    119 	AttrAssociated:    true,
    120 	AttrDataLocation:  true,
    121 	AttrStride:        true,
    122 }
    123 
    124 // attrPtrClass indicates the *ptr class of attributes that have
    125 // encoding formSecOffset in DWARF 4 or formData* in DWARF 2 and 3.
    126 var attrPtrClass = map[Attr]Class{
    127 	AttrLocation:      ClassLocListPtr,
    128 	AttrStmtList:      ClassLinePtr,
    129 	AttrStringLength:  ClassLocListPtr,
    130 	AttrReturnAddr:    ClassLocListPtr,
    131 	AttrStartScope:    ClassRangeListPtr,
    132 	AttrDataMemberLoc: ClassLocListPtr,
    133 	AttrFrameBase:     ClassLocListPtr,
    134 	AttrMacroInfo:     ClassMacPtr,
    135 	AttrSegment:       ClassLocListPtr,
    136 	AttrStaticLink:    ClassLocListPtr,
    137 	AttrUseLocation:   ClassLocListPtr,
    138 	AttrVtableElemLoc: ClassLocListPtr,
    139 	AttrRanges:        ClassRangeListPtr,
    140 }
    141 
    142 // formToClass returns the DWARF 4 Class for the given form. If the
    143 // DWARF version is less then 4, it will disambiguate some forms
    144 // depending on the attribute.
    145 func formToClass(form format, attr Attr, vers int, b *buf) Class {
    146 	switch form {
    147 	default:
    148 		b.error("cannot determine class of unknown attribute form")
    149 		return 0
    150 
    151 	case formAddr:
    152 		return ClassAddress
    153 
    154 	case formDwarfBlock1, formDwarfBlock2, formDwarfBlock4, formDwarfBlock:
    155 		// In DWARF 2 and 3, ClassExprLoc was encoded as a
    156 		// block. DWARF 4 distinguishes ClassBlock and
    157 		// ClassExprLoc, but there are no attributes that can
    158 		// be both, so we also promote ClassBlock values in
    159 		// DWARF 4 that should be ClassExprLoc in case
    160 		// producers get this wrong.
    161 		if attrIsExprloc[attr] {
    162 			return ClassExprLoc
    163 		}
    164 		return ClassBlock
    165 
    166 	case formData1, formData2, formData4, formData8, formSdata, formUdata:
    167 		// In DWARF 2 and 3, ClassPtr was encoded as a
    168 		// constant. Unlike ClassExprLoc/ClassBlock, some
    169 		// DWARF 4 attributes need to distinguish Class*Ptr
    170 		// from ClassConstant, so we only do this promotion
    171 		// for versions 2 and 3.
    172 		if class, ok := attrPtrClass[attr]; vers < 4 && ok {
    173 			return class
    174 		}
    175 		return ClassConstant
    176 
    177 	case formFlag, formFlagPresent:
    178 		return ClassFlag
    179 
    180 	case formRefAddr, formRef1, formRef2, formRef4, formRef8, formRefUdata:
    181 		return ClassReference
    182 
    183 	case formRefSig8:
    184 		return ClassReferenceSig
    185 
    186 	case formString, formStrp:
    187 		return ClassString
    188 
    189 	case formSecOffset:
    190 		// DWARF 4 defines four *ptr classes, but doesn't
    191 		// distinguish them in the encoding. Disambiguate
    192 		// these classes using the attribute.
    193 		if class, ok := attrPtrClass[attr]; ok {
    194 			return class
    195 		}
    196 		b.error("cannot determine class of unknown attribute with formSecOffset")
    197 		return 0
    198 
    199 	case formExprloc:
    200 		return ClassExprLoc
    201 
    202 	case formGnuRefAlt:
    203 		return ClassReferenceAlt
    204 
    205 	case formGnuStrpAlt:
    206 		return ClassStringAlt
    207 	}
    208 }
    209 
    210 // An entry is a sequence of attribute/value pairs.
    211 type Entry struct {
    212 	Offset   Offset // offset of Entry in DWARF info
    213 	Tag      Tag    // tag (kind of Entry)
    214 	Children bool   // whether Entry is followed by children
    215 	Field    []Field
    216 }
    217 
    218 // A Field is a single attribute/value pair in an Entry.
    219 //
    220 // A value can be one of several "attribute classes" defined by DWARF.
    221 // The Go types corresponding to each class are:
    222 //
    223 //    DWARF class       Go type        Class
    224 //    -----------       -------        -----
    225 //    address           uint64         ClassAddress
    226 //    block             []byte         ClassBlock
    227 //    constant          int64          ClassConstant
    228 //    flag              bool           ClassFlag
    229 //    reference
    230 //      to info         dwarf.Offset   ClassReference
    231 //      to type unit    uint64         ClassReferenceSig
    232 //    string            string         ClassString
    233 //    exprloc           []byte         ClassExprLoc
    234 //    lineptr           int64          ClassLinePtr
    235 //    loclistptr        int64          ClassLocListPtr
    236 //    macptr            int64          ClassMacPtr
    237 //    rangelistptr      int64          ClassRangeListPtr
    238 type Field struct {
    239 	Attr  Attr
    240 	Val   interface{}
    241 	Class Class
    242 }
    243 
    244 // A Class is the DWARF 4 class of an attibute value.
    245 //
    246 // In general, a given attribute's value may take on one of several
    247 // possible classes defined by DWARF, each of which leads to a
    248 // slightly different interpretation of the attribute.
    249 //
    250 // DWARF version 4 distinguishes attribute value classes more finely
    251 // than previous versions of DWARF. The reader will disambiguate
    252 // coarser classes from earlier versions of DWARF into the appropriate
    253 // DWARF 4 class. For example, DWARF 2 uses "constant" for constants
    254 // as well as all types of section offsets, but the reader will
    255 // canonicalize attributes in DWARF 2 files that refer to section
    256 // offsets to one of the Class*Ptr classes, even though these classes
    257 // were only defined in DWARF 3.
    258 type Class int
    259 
    260 const (
    261 	// ClassAddress represents values of type uint64 that are
    262 	// addresses on the target machine.
    263 	ClassAddress Class = 1 + iota
    264 
    265 	// ClassBlock represents values of type []byte whose
    266 	// interpretation depends on the attribute.
    267 	ClassBlock
    268 
    269 	// ClassConstant represents values of type int64 that are
    270 	// constants. The interpretation of this constant depends on
    271 	// the attribute.
    272 	ClassConstant
    273 
    274 	// ClassExprLoc represents values of type []byte that contain
    275 	// an encoded DWARF expression or location description.
    276 	ClassExprLoc
    277 
    278 	// ClassFlag represents values of type bool.
    279 	ClassFlag
    280 
    281 	// ClassLinePtr represents values that are an int64 offset
    282 	// into the "line" section.
    283 	ClassLinePtr
    284 
    285 	// ClassLocListPtr represents values that are an int64 offset
    286 	// into the "loclist" section.
    287 	ClassLocListPtr
    288 
    289 	// ClassMacPtr represents values that are an int64 offset into
    290 	// the "mac" section.
    291 	ClassMacPtr
    292 
    293 	// ClassMacPtr represents values that are an int64 offset into
    294 	// the "rangelist" section.
    295 	ClassRangeListPtr
    296 
    297 	// ClassReference represents values that are an Offset offset
    298 	// of an Entry in the info section (for use with Reader.Seek).
    299 	// The DWARF specification combines ClassReference and
    300 	// ClassReferenceSig into class "reference".
    301 	ClassReference
    302 
    303 	// ClassReferenceSig represents values that are a uint64 type
    304 	// signature referencing a type Entry.
    305 	ClassReferenceSig
    306 
    307 	// ClassString represents values that are strings. If the
    308 	// compilation unit specifies the AttrUseUTF8 flag (strongly
    309 	// recommended), the string value will be encoded in UTF-8.
    310 	// Otherwise, the encoding is unspecified.
    311 	ClassString
    312 
    313 	// ClassReferenceAlt represents values of type int64 that are
    314 	// an offset into the DWARF "info" section of an alternate
    315 	// object file.
    316 	ClassReferenceAlt
    317 
    318 	// ClassStringAlt represents values of type int64 that are an
    319 	// offset into the DWARF string section of an alternate object
    320 	// file.
    321 	ClassStringAlt
    322 )
    323 
    324 //go:generate stringer -type=Class
    325 
    326 func (i Class) GoString() string {
    327 	return "dwarf." + i.String()
    328 }
    329 
    330 // Val returns the value associated with attribute Attr in Entry,
    331 // or nil if there is no such attribute.
    332 //
    333 // A common idiom is to merge the check for nil return with
    334 // the check that the value has the expected dynamic type, as in:
    335 //	v, ok := e.Val(AttrSibling).(int64)
    336 //
    337 func (e *Entry) Val(a Attr) interface{} {
    338 	if f := e.AttrField(a); f != nil {
    339 		return f.Val
    340 	}
    341 	return nil
    342 }
    343 
    344 // AttrField returns the Field associated with attribute Attr in
    345 // Entry, or nil if there is no such attribute.
    346 func (e *Entry) AttrField(a Attr) *Field {
    347 	for i, f := range e.Field {
    348 		if f.Attr == a {
    349 			return &e.Field[i]
    350 		}
    351 	}
    352 	return nil
    353 }
    354 
    355 // An Offset represents the location of an Entry within the DWARF info.
    356 // (See Reader.Seek.)
    357 type Offset uint32
    358 
    359 // Entry reads a single entry from buf, decoding
    360 // according to the given abbreviation table.
    361 func (b *buf) entry(atab abbrevTable, ubase Offset) *Entry {
    362 	off := b.off
    363 	id := uint32(b.uint())
    364 	if id == 0 {
    365 		return &Entry{}
    366 	}
    367 	a, ok := atab[id]
    368 	if !ok {
    369 		b.error("unknown abbreviation table index")
    370 		return nil
    371 	}
    372 	e := &Entry{
    373 		Offset:   off,
    374 		Tag:      a.tag,
    375 		Children: a.children,
    376 		Field:    make([]Field, len(a.field)),
    377 	}
    378 	for i := range e.Field {
    379 		e.Field[i].Attr = a.field[i].attr
    380 		e.Field[i].Class = a.field[i].class
    381 		fmt := a.field[i].fmt
    382 		if fmt == formIndirect {
    383 			fmt = format(b.uint())
    384 		}
    385 		var val interface{}
    386 		switch fmt {
    387 		default:
    388 			b.error("unknown entry attr format 0x" + strconv.FormatInt(int64(fmt), 16))
    389 
    390 		// address
    391 		case formAddr:
    392 			val = b.addr()
    393 
    394 		// block
    395 		case formDwarfBlock1:
    396 			val = b.bytes(int(b.uint8()))
    397 		case formDwarfBlock2:
    398 			val = b.bytes(int(b.uint16()))
    399 		case formDwarfBlock4:
    400 			val = b.bytes(int(b.uint32()))
    401 		case formDwarfBlock:
    402 			val = b.bytes(int(b.uint()))
    403 
    404 		// constant
    405 		case formData1:
    406 			val = int64(b.uint8())
    407 		case formData2:
    408 			val = int64(b.uint16())
    409 		case formData4:
    410 			val = int64(b.uint32())
    411 		case formData8:
    412 			val = int64(b.uint64())
    413 		case formSdata:
    414 			val = int64(b.int())
    415 		case formUdata:
    416 			val = int64(b.uint())
    417 
    418 		// flag
    419 		case formFlag:
    420 			val = b.uint8() == 1
    421 		// New in DWARF 4.
    422 		case formFlagPresent:
    423 			// The attribute is implicitly indicated as present, and no value is
    424 			// encoded in the debugging information entry itself.
    425 			val = true
    426 
    427 		// reference to other entry
    428 		case formRefAddr:
    429 			vers := b.format.version()
    430 			if vers == 0 {
    431 				b.error("unknown version for DW_FORM_ref_addr")
    432 			} else if vers == 2 {
    433 				val = Offset(b.addr())
    434 			} else {
    435 				is64, known := b.format.dwarf64()
    436 				if !known {
    437 					b.error("unknown size for DW_FORM_ref_addr")
    438 				} else if is64 {
    439 					val = Offset(b.uint64())
    440 				} else {
    441 					val = Offset(b.uint32())
    442 				}
    443 			}
    444 		case formRef1:
    445 			val = Offset(b.uint8()) + ubase
    446 		case formRef2:
    447 			val = Offset(b.uint16()) + ubase
    448 		case formRef4:
    449 			val = Offset(b.uint32()) + ubase
    450 		case formRef8:
    451 			val = Offset(b.uint64()) + ubase
    452 		case formRefUdata:
    453 			val = Offset(b.uint()) + ubase
    454 
    455 		// string
    456 		case formString:
    457 			val = b.string()
    458 		case formStrp:
    459 			off := b.uint32() // offset into .debug_str
    460 			if b.err != nil {
    461 				return nil
    462 			}
    463 			b1 := makeBuf(b.dwarf, unknownFormat{}, "str", 0, b.dwarf.str)
    464 			b1.skip(int(off))
    465 			val = b1.string()
    466 			if b1.err != nil {
    467 				b.err = b1.err
    468 				return nil
    469 			}
    470 
    471 		// lineptr, loclistptr, macptr, rangelistptr
    472 		// New in DWARF 4, but clang can generate them with -gdwarf-2.
    473 		// Section reference, replacing use of formData4 and formData8.
    474 		case formSecOffset, formGnuRefAlt, formGnuStrpAlt:
    475 			is64, known := b.format.dwarf64()
    476 			if !known {
    477 				b.error("unknown size for form 0x" + strconv.FormatInt(int64(fmt), 16))
    478 			} else if is64 {
    479 				val = int64(b.uint64())
    480 			} else {
    481 				val = int64(b.uint32())
    482 			}
    483 
    484 		// exprloc
    485 		// New in DWARF 4.
    486 		case formExprloc:
    487 			val = b.bytes(int(b.uint()))
    488 
    489 		// reference
    490 		// New in DWARF 4.
    491 		case formRefSig8:
    492 			// 64-bit type signature.
    493 			val = b.uint64()
    494 		}
    495 		e.Field[i].Val = val
    496 	}
    497 	if b.err != nil {
    498 		return nil
    499 	}
    500 	return e
    501 }
    502 
    503 // A Reader allows reading Entry structures from a DWARF ``info'' section.
    504 // The Entry structures are arranged in a tree.  The Reader's Next function
    505 // return successive entries from a pre-order traversal of the tree.
    506 // If an entry has children, its Children field will be true, and the children
    507 // follow, terminated by an Entry with Tag 0.
    508 type Reader struct {
    509 	b            buf
    510 	d            *Data
    511 	err          error
    512 	unit         int
    513 	lastChildren bool   // .Children of last entry returned by Next
    514 	lastSibling  Offset // .Val(AttrSibling) of last entry returned by Next
    515 }
    516 
    517 // Reader returns a new Reader for Data.
    518 // The reader is positioned at byte offset 0 in the DWARF ``info'' section.
    519 func (d *Data) Reader() *Reader {
    520 	r := &Reader{d: d}
    521 	r.Seek(0)
    522 	return r
    523 }
    524 
    525 // AddressSize returns the size in bytes of addresses in the current compilation
    526 // unit.
    527 func (r *Reader) AddressSize() int {
    528 	return r.d.unit[r.unit].asize
    529 }
    530 
    531 // Seek positions the Reader at offset off in the encoded entry stream.
    532 // Offset 0 can be used to denote the first entry.
    533 func (r *Reader) Seek(off Offset) {
    534 	d := r.d
    535 	r.err = nil
    536 	r.lastChildren = false
    537 	if off == 0 {
    538 		if len(d.unit) == 0 {
    539 			return
    540 		}
    541 		u := &d.unit[0]
    542 		r.unit = 0
    543 		r.b = makeBuf(r.d, u, "info", u.off, u.data)
    544 		return
    545 	}
    546 
    547 	i := d.offsetToUnit(off)
    548 	if i == -1 {
    549 		r.err = errors.New("offset out of range")
    550 		return
    551 	}
    552 	u := &d.unit[i]
    553 	r.unit = i
    554 	r.b = makeBuf(r.d, u, "info", off, u.data[off-u.off:])
    555 }
    556 
    557 // maybeNextUnit advances to the next unit if this one is finished.
    558 func (r *Reader) maybeNextUnit() {
    559 	for len(r.b.data) == 0 && r.unit+1 < len(r.d.unit) {
    560 		r.unit++
    561 		u := &r.d.unit[r.unit]
    562 		r.b = makeBuf(r.d, u, "info", u.off, u.data)
    563 	}
    564 }
    565 
    566 // Next reads the next entry from the encoded entry stream.
    567 // It returns nil, nil when it reaches the end of the section.
    568 // It returns an error if the current offset is invalid or the data at the
    569 // offset cannot be decoded as a valid Entry.
    570 func (r *Reader) Next() (*Entry, error) {
    571 	if r.err != nil {
    572 		return nil, r.err
    573 	}
    574 	r.maybeNextUnit()
    575 	if len(r.b.data) == 0 {
    576 		return nil, nil
    577 	}
    578 	u := &r.d.unit[r.unit]
    579 	e := r.b.entry(u.atable, u.base)
    580 	if r.b.err != nil {
    581 		r.err = r.b.err
    582 		return nil, r.err
    583 	}
    584 	if e != nil {
    585 		r.lastChildren = e.Children
    586 		if r.lastChildren {
    587 			r.lastSibling, _ = e.Val(AttrSibling).(Offset)
    588 		}
    589 	} else {
    590 		r.lastChildren = false
    591 	}
    592 	return e, nil
    593 }
    594 
    595 // SkipChildren skips over the child entries associated with
    596 // the last Entry returned by Next.  If that Entry did not have
    597 // children or Next has not been called, SkipChildren is a no-op.
    598 func (r *Reader) SkipChildren() {
    599 	if r.err != nil || !r.lastChildren {
    600 		return
    601 	}
    602 
    603 	// If the last entry had a sibling attribute,
    604 	// that attribute gives the offset of the next
    605 	// sibling, so we can avoid decoding the
    606 	// child subtrees.
    607 	if r.lastSibling >= r.b.off {
    608 		r.Seek(r.lastSibling)
    609 		return
    610 	}
    611 
    612 	for {
    613 		e, err := r.Next()
    614 		if err != nil || e == nil || e.Tag == 0 {
    615 			break
    616 		}
    617 		if e.Children {
    618 			r.SkipChildren()
    619 		}
    620 	}
    621 }
    622 
    623 // clone returns a copy of the reader.  This is used by the typeReader
    624 // interface.
    625 func (r *Reader) clone() typeReader {
    626 	return r.d.Reader()
    627 }
    628 
    629 // offset returns the current buffer offset.  This is used by the
    630 // typeReader interface.
    631 func (r *Reader) offset() Offset {
    632 	return r.b.off
    633 }
    634