Home | History | Annotate | Download | only in dwarf
      1 // Copyright 2015 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package dwarf
      6 
      7 import (
      8 	"errors"
      9 	"fmt"
     10 	"io"
     11 	"path"
     12 )
     13 
     14 // A LineReader reads a sequence of LineEntry structures from a DWARF
     15 // "line" section for a single compilation unit. LineEntries occur in
     16 // order of increasing PC and each LineEntry gives metadata for the
     17 // instructions from that LineEntry's PC to just before the next
     18 // LineEntry's PC. The last entry will have its EndSequence field set.
     19 type LineReader struct {
     20 	buf buf
     21 
     22 	// Original .debug_line section data. Used by Seek.
     23 	section []byte
     24 
     25 	// Header information
     26 	version              uint16
     27 	minInstructionLength int
     28 	maxOpsPerInstruction int
     29 	defaultIsStmt        bool
     30 	lineBase             int
     31 	lineRange            int
     32 	opcodeBase           int
     33 	opcodeLengths        []int
     34 	directories          []string
     35 	fileEntries          []*LineFile
     36 
     37 	programOffset Offset // section offset of line number program
     38 	endOffset     Offset // section offset of byte following program
     39 
     40 	initialFileEntries int // initial length of fileEntries
     41 
     42 	// Current line number program state machine registers
     43 	state     LineEntry // public state
     44 	fileIndex int       // private state
     45 }
     46 
     47 // A LineEntry is a row in a DWARF line table.
     48 type LineEntry struct {
     49 	// Address is the program-counter value of a machine
     50 	// instruction generated by the compiler. This LineEntry
     51 	// applies to each instruction from Address to just before the
     52 	// Address of the next LineEntry.
     53 	Address uint64
     54 
     55 	// OpIndex is the index of an operation within a VLIW
     56 	// instruction. The index of the first operation is 0. For
     57 	// non-VLIW architectures, it will always be 0. Address and
     58 	// OpIndex together form an operation pointer that can
     59 	// reference any individual operation within the instruction
     60 	// stream.
     61 	OpIndex int
     62 
     63 	// File is the source file corresponding to these
     64 	// instructions.
     65 	File *LineFile
     66 
     67 	// Line is the source code line number corresponding to these
     68 	// instructions. Lines are numbered beginning at 1. It may be
     69 	// 0 if these instructions cannot be attributed to any source
     70 	// line.
     71 	Line int
     72 
     73 	// Column is the column number within the source line of these
     74 	// instructions. Columns are numbered beginning at 1. It may
     75 	// be 0 to indicate the "left edge" of the line.
     76 	Column int
     77 
     78 	// IsStmt indicates that Address is a recommended breakpoint
     79 	// location, such as the beginning of a line, statement, or a
     80 	// distinct subpart of a statement.
     81 	IsStmt bool
     82 
     83 	// BasicBlock indicates that Address is the beginning of a
     84 	// basic block.
     85 	BasicBlock bool
     86 
     87 	// PrologueEnd indicates that Address is one (of possibly
     88 	// many) PCs where execution should be suspended for a
     89 	// breakpoint on entry to the containing function.
     90 	//
     91 	// Added in DWARF 3.
     92 	PrologueEnd bool
     93 
     94 	// EpilogueBegin indicates that Address is one (of possibly
     95 	// many) PCs where execution should be suspended for a
     96 	// breakpoint on exit from this function.
     97 	//
     98 	// Added in DWARF 3.
     99 	EpilogueBegin bool
    100 
    101 	// ISA is the instruction set architecture for these
    102 	// instructions. Possible ISA values should be defined by the
    103 	// applicable ABI specification.
    104 	//
    105 	// Added in DWARF 3.
    106 	ISA int
    107 
    108 	// Discriminator is an arbitrary integer indicating the block
    109 	// to which these instructions belong. It serves to
    110 	// distinguish among multiple blocks that may all have with
    111 	// the same source file, line, and column. Where only one
    112 	// block exists for a given source position, it should be 0.
    113 	//
    114 	// Added in DWARF 3.
    115 	Discriminator int
    116 
    117 	// EndSequence indicates that Address is the first byte after
    118 	// the end of a sequence of target machine instructions. If it
    119 	// is set, only this and the Address field are meaningful. A
    120 	// line number table may contain information for multiple
    121 	// potentially disjoint instruction sequences. The last entry
    122 	// in a line table should always have EndSequence set.
    123 	EndSequence bool
    124 }
    125 
    126 // A LineFile is a source file referenced by a DWARF line table entry.
    127 type LineFile struct {
    128 	Name   string
    129 	Mtime  uint64 // Implementation defined modification time, or 0 if unknown
    130 	Length int    // File length, or 0 if unknown
    131 }
    132 
    133 // LineReader returns a new reader for the line table of compilation
    134 // unit cu, which must be an Entry with tag TagCompileUnit.
    135 //
    136 // If this compilation unit has no line table, it returns nil, nil.
    137 func (d *Data) LineReader(cu *Entry) (*LineReader, error) {
    138 	if d.line == nil {
    139 		// No line tables available.
    140 		return nil, nil
    141 	}
    142 
    143 	// Get line table information from cu.
    144 	off, ok := cu.Val(AttrStmtList).(int64)
    145 	if !ok {
    146 		// cu has no line table.
    147 		return nil, nil
    148 	}
    149 	if off > int64(len(d.line)) {
    150 		return nil, errors.New("AttrStmtList value out of range")
    151 	}
    152 	// AttrCompDir is optional if all file names are absolute. Use
    153 	// the empty string if it's not present.
    154 	compDir, _ := cu.Val(AttrCompDir).(string)
    155 
    156 	// Create the LineReader.
    157 	u := &d.unit[d.offsetToUnit(cu.Offset)]
    158 	buf := makeBuf(d, u, "line", Offset(off), d.line[off:])
    159 	// The compilation directory is implicitly directories[0].
    160 	r := LineReader{buf: buf, section: d.line, directories: []string{compDir}}
    161 
    162 	// Read the header.
    163 	if err := r.readHeader(); err != nil {
    164 		return nil, err
    165 	}
    166 
    167 	// Initialize line reader state.
    168 	r.Reset()
    169 
    170 	return &r, nil
    171 }
    172 
    173 // readHeader reads the line number program header from r.buf and sets
    174 // all of the header fields in r.
    175 func (r *LineReader) readHeader() error {
    176 	buf := &r.buf
    177 
    178 	// Read basic header fields [DWARF2 6.2.4].
    179 	hdrOffset := buf.off
    180 	unitLength, dwarf64 := buf.unitLength()
    181 	r.endOffset = buf.off + unitLength
    182 	if r.endOffset > buf.off+Offset(len(buf.data)) {
    183 		return DecodeError{"line", hdrOffset, fmt.Sprintf("line table end %d exceeds section size %d", r.endOffset, buf.off+Offset(len(buf.data)))}
    184 	}
    185 	r.version = buf.uint16()
    186 	if buf.err == nil && (r.version < 2 || r.version > 4) {
    187 		// DWARF goes to all this effort to make new opcodes
    188 		// backward-compatible, and then adds fields right in
    189 		// the middle of the header in new versions, so we're
    190 		// picky about only supporting known line table
    191 		// versions.
    192 		return DecodeError{"line", hdrOffset, fmt.Sprintf("unknown line table version %d", r.version)}
    193 	}
    194 	var headerLength Offset
    195 	if dwarf64 {
    196 		headerLength = Offset(buf.uint64())
    197 	} else {
    198 		headerLength = Offset(buf.uint32())
    199 	}
    200 	r.programOffset = buf.off + headerLength
    201 	r.minInstructionLength = int(buf.uint8())
    202 	if r.version >= 4 {
    203 		// [DWARF4 6.2.4]
    204 		r.maxOpsPerInstruction = int(buf.uint8())
    205 	} else {
    206 		r.maxOpsPerInstruction = 1
    207 	}
    208 	r.defaultIsStmt = buf.uint8() != 0
    209 	r.lineBase = int(int8(buf.uint8()))
    210 	r.lineRange = int(buf.uint8())
    211 
    212 	// Validate header.
    213 	if buf.err != nil {
    214 		return buf.err
    215 	}
    216 	if r.maxOpsPerInstruction == 0 {
    217 		return DecodeError{"line", hdrOffset, "invalid maximum operations per instruction: 0"}
    218 	}
    219 	if r.lineRange == 0 {
    220 		return DecodeError{"line", hdrOffset, "invalid line range: 0"}
    221 	}
    222 
    223 	// Read standard opcode length table. This table starts with opcode 1.
    224 	r.opcodeBase = int(buf.uint8())
    225 	r.opcodeLengths = make([]int, r.opcodeBase)
    226 	for i := 1; i < r.opcodeBase; i++ {
    227 		r.opcodeLengths[i] = int(buf.uint8())
    228 	}
    229 
    230 	// Validate opcode lengths.
    231 	if buf.err != nil {
    232 		return buf.err
    233 	}
    234 	for i, length := range r.opcodeLengths {
    235 		if known, ok := knownOpcodeLengths[i]; ok && known != length {
    236 			return DecodeError{"line", hdrOffset, fmt.Sprintf("opcode %d expected to have length %d, but has length %d", i, known, length)}
    237 		}
    238 	}
    239 
    240 	// Read include directories table. The caller already set
    241 	// directories[0] to the compilation directory.
    242 	for {
    243 		directory := buf.string()
    244 		if buf.err != nil {
    245 			return buf.err
    246 		}
    247 		if len(directory) == 0 {
    248 			break
    249 		}
    250 		if !path.IsAbs(directory) {
    251 			// Relative paths are implicitly relative to
    252 			// the compilation directory.
    253 			directory = path.Join(r.directories[0], directory)
    254 		}
    255 		r.directories = append(r.directories, directory)
    256 	}
    257 
    258 	// Read file name list. File numbering starts with 1, so leave
    259 	// the first entry nil.
    260 	r.fileEntries = make([]*LineFile, 1)
    261 	for {
    262 		if done, err := r.readFileEntry(); err != nil {
    263 			return err
    264 		} else if done {
    265 			break
    266 		}
    267 	}
    268 	r.initialFileEntries = len(r.fileEntries)
    269 
    270 	return buf.err
    271 }
    272 
    273 // readFileEntry reads a file entry from either the header or a
    274 // DW_LNE_define_file extended opcode and adds it to r.fileEntries. A
    275 // true return value indicates that there are no more entries to read.
    276 func (r *LineReader) readFileEntry() (bool, error) {
    277 	name := r.buf.string()
    278 	if r.buf.err != nil {
    279 		return false, r.buf.err
    280 	}
    281 	if len(name) == 0 {
    282 		return true, nil
    283 	}
    284 	off := r.buf.off
    285 	dirIndex := int(r.buf.uint())
    286 	if !path.IsAbs(name) {
    287 		if dirIndex >= len(r.directories) {
    288 			return false, DecodeError{"line", off, "directory index too large"}
    289 		}
    290 		name = path.Join(r.directories[dirIndex], name)
    291 	}
    292 	mtime := r.buf.uint()
    293 	length := int(r.buf.uint())
    294 
    295 	r.fileEntries = append(r.fileEntries, &LineFile{name, mtime, length})
    296 	return false, nil
    297 }
    298 
    299 // updateFile updates r.state.File after r.fileIndex has
    300 // changed or r.fileEntries has changed.
    301 func (r *LineReader) updateFile() {
    302 	if r.fileIndex < len(r.fileEntries) {
    303 		r.state.File = r.fileEntries[r.fileIndex]
    304 	} else {
    305 		r.state.File = nil
    306 	}
    307 }
    308 
    309 // Next sets *entry to the next row in this line table and moves to
    310 // the next row. If there are no more entries and the line table is
    311 // properly terminated, it returns io.EOF.
    312 //
    313 // Rows are always in order of increasing entry.Address, but
    314 // entry.Line may go forward or backward.
    315 func (r *LineReader) Next(entry *LineEntry) error {
    316 	if r.buf.err != nil {
    317 		return r.buf.err
    318 	}
    319 
    320 	// Execute opcodes until we reach an opcode that emits a line
    321 	// table entry.
    322 	for {
    323 		if len(r.buf.data) == 0 {
    324 			return io.EOF
    325 		}
    326 		emit := r.step(entry)
    327 		if r.buf.err != nil {
    328 			return r.buf.err
    329 		}
    330 		if emit {
    331 			return nil
    332 		}
    333 	}
    334 }
    335 
    336 // knownOpcodeLengths gives the opcode lengths (in varint arguments)
    337 // of known standard opcodes.
    338 var knownOpcodeLengths = map[int]int{
    339 	lnsCopy:             0,
    340 	lnsAdvancePC:        1,
    341 	lnsAdvanceLine:      1,
    342 	lnsSetFile:          1,
    343 	lnsNegateStmt:       0,
    344 	lnsSetBasicBlock:    0,
    345 	lnsConstAddPC:       0,
    346 	lnsSetPrologueEnd:   0,
    347 	lnsSetEpilogueBegin: 0,
    348 	lnsSetISA:           1,
    349 	// lnsFixedAdvancePC takes a uint8 rather than a varint; it's
    350 	// unclear what length the header is supposed to claim, so
    351 	// ignore it.
    352 }
    353 
    354 // step processes the next opcode and updates r.state. If the opcode
    355 // emits a row in the line table, this updates *entry and returns
    356 // true.
    357 func (r *LineReader) step(entry *LineEntry) bool {
    358 	opcode := int(r.buf.uint8())
    359 
    360 	if opcode >= r.opcodeBase {
    361 		// Special opcode [DWARF2 6.2.5.1, DWARF4 6.2.5.1]
    362 		adjustedOpcode := opcode - r.opcodeBase
    363 		r.advancePC(adjustedOpcode / r.lineRange)
    364 		lineDelta := r.lineBase + int(adjustedOpcode)%r.lineRange
    365 		r.state.Line += lineDelta
    366 		goto emit
    367 	}
    368 
    369 	switch opcode {
    370 	case 0:
    371 		// Extended opcode [DWARF2 6.2.5.3]
    372 		length := Offset(r.buf.uint())
    373 		startOff := r.buf.off
    374 		opcode := r.buf.uint8()
    375 
    376 		switch opcode {
    377 		case lneEndSequence:
    378 			r.state.EndSequence = true
    379 			*entry = r.state
    380 			r.resetState()
    381 
    382 		case lneSetAddress:
    383 			r.state.Address = r.buf.addr()
    384 
    385 		case lneDefineFile:
    386 			if done, err := r.readFileEntry(); err != nil {
    387 				r.buf.err = err
    388 				return false
    389 			} else if done {
    390 				r.buf.err = DecodeError{"line", startOff, "malformed DW_LNE_define_file operation"}
    391 				return false
    392 			}
    393 			r.updateFile()
    394 
    395 		case lneSetDiscriminator:
    396 			// [DWARF4 6.2.5.3]
    397 			r.state.Discriminator = int(r.buf.uint())
    398 		}
    399 
    400 		r.buf.skip(int(startOff + length - r.buf.off))
    401 
    402 		if opcode == lneEndSequence {
    403 			return true
    404 		}
    405 
    406 	// Standard opcodes [DWARF2 6.2.5.2]
    407 	case lnsCopy:
    408 		goto emit
    409 
    410 	case lnsAdvancePC:
    411 		r.advancePC(int(r.buf.uint()))
    412 
    413 	case lnsAdvanceLine:
    414 		r.state.Line += int(r.buf.int())
    415 
    416 	case lnsSetFile:
    417 		r.fileIndex = int(r.buf.uint())
    418 		r.updateFile()
    419 
    420 	case lnsSetColumn:
    421 		r.state.Column = int(r.buf.uint())
    422 
    423 	case lnsNegateStmt:
    424 		r.state.IsStmt = !r.state.IsStmt
    425 
    426 	case lnsSetBasicBlock:
    427 		r.state.BasicBlock = true
    428 
    429 	case lnsConstAddPC:
    430 		r.advancePC((255 - r.opcodeBase) / r.lineRange)
    431 
    432 	case lnsFixedAdvancePC:
    433 		r.state.Address += uint64(r.buf.uint16())
    434 
    435 	// DWARF3 standard opcodes [DWARF3 6.2.5.2]
    436 	case lnsSetPrologueEnd:
    437 		r.state.PrologueEnd = true
    438 
    439 	case lnsSetEpilogueBegin:
    440 		r.state.EpilogueBegin = true
    441 
    442 	case lnsSetISA:
    443 		r.state.ISA = int(r.buf.uint())
    444 
    445 	default:
    446 		// Unhandled standard opcode. Skip the number of
    447 		// arguments that the prologue says this opcode has.
    448 		for i := 0; i < r.opcodeLengths[opcode]; i++ {
    449 			r.buf.uint()
    450 		}
    451 	}
    452 	return false
    453 
    454 emit:
    455 	*entry = r.state
    456 	r.state.BasicBlock = false
    457 	r.state.PrologueEnd = false
    458 	r.state.EpilogueBegin = false
    459 	r.state.Discriminator = 0
    460 	return true
    461 }
    462 
    463 // advancePC advances "operation pointer" (the combination of Address
    464 // and OpIndex) in r.state by opAdvance steps.
    465 func (r *LineReader) advancePC(opAdvance int) {
    466 	opIndex := r.state.OpIndex + opAdvance
    467 	r.state.Address += uint64(r.minInstructionLength * (opIndex / r.maxOpsPerInstruction))
    468 	r.state.OpIndex = opIndex % r.maxOpsPerInstruction
    469 }
    470 
    471 // A LineReaderPos represents a position in a line table.
    472 type LineReaderPos struct {
    473 	// off is the current offset in the DWARF line section.
    474 	off Offset
    475 	// numFileEntries is the length of fileEntries.
    476 	numFileEntries int
    477 	// state and fileIndex are the statement machine state at
    478 	// offset off.
    479 	state     LineEntry
    480 	fileIndex int
    481 }
    482 
    483 // Tell returns the current position in the line table.
    484 func (r *LineReader) Tell() LineReaderPos {
    485 	return LineReaderPos{r.buf.off, len(r.fileEntries), r.state, r.fileIndex}
    486 }
    487 
    488 // Seek restores the line table reader to a position returned by Tell.
    489 //
    490 // The argument pos must have been returned by a call to Tell on this
    491 // line table.
    492 func (r *LineReader) Seek(pos LineReaderPos) {
    493 	r.buf.off = pos.off
    494 	r.buf.data = r.section[r.buf.off:r.endOffset]
    495 	r.fileEntries = r.fileEntries[:pos.numFileEntries]
    496 	r.state = pos.state
    497 	r.fileIndex = pos.fileIndex
    498 }
    499 
    500 // Reset repositions the line table reader at the beginning of the
    501 // line table.
    502 func (r *LineReader) Reset() {
    503 	// Reset buffer to the line number program offset.
    504 	r.buf.off = r.programOffset
    505 	r.buf.data = r.section[r.buf.off:r.endOffset]
    506 
    507 	// Reset file entries list.
    508 	r.fileEntries = r.fileEntries[:r.initialFileEntries]
    509 
    510 	// Reset line number program state.
    511 	r.resetState()
    512 }
    513 
    514 // resetState resets r.state to its default values
    515 func (r *LineReader) resetState() {
    516 	// Reset the state machine registers to the defaults given in
    517 	// [DWARF4 6.2.2].
    518 	r.state = LineEntry{
    519 		Address:       0,
    520 		OpIndex:       0,
    521 		File:          nil,
    522 		Line:          1,
    523 		Column:        0,
    524 		IsStmt:        r.defaultIsStmt,
    525 		BasicBlock:    false,
    526 		PrologueEnd:   false,
    527 		EpilogueBegin: false,
    528 		ISA:           0,
    529 		Discriminator: 0,
    530 	}
    531 	r.fileIndex = 1
    532 	r.updateFile()
    533 }
    534 
    535 // ErrUnknownPC is the error returned by LineReader.ScanPC when the
    536 // seek PC is not covered by any entry in the line table.
    537 var ErrUnknownPC = errors.New("ErrUnknownPC")
    538 
    539 // SeekPC sets *entry to the LineEntry that includes pc and positions
    540 // the reader on the next entry in the line table. If necessary, this
    541 // will seek backwards to find pc.
    542 //
    543 // If pc is not covered by any entry in this line table, SeekPC
    544 // returns ErrUnknownPC. In this case, *entry and the final seek
    545 // position are unspecified.
    546 //
    547 // Note that DWARF line tables only permit sequential, forward scans.
    548 // Hence, in the worst case, this takes time linear in the size of the
    549 // line table. If the caller wishes to do repeated fast PC lookups, it
    550 // should build an appropriate index of the line table.
    551 func (r *LineReader) SeekPC(pc uint64, entry *LineEntry) error {
    552 	if err := r.Next(entry); err != nil {
    553 		return err
    554 	}
    555 	if entry.Address > pc {
    556 		// We're too far. Start at the beginning of the table.
    557 		r.Reset()
    558 		if err := r.Next(entry); err != nil {
    559 			return err
    560 		}
    561 		if entry.Address > pc {
    562 			// The whole table starts after pc.
    563 			r.Reset()
    564 			return ErrUnknownPC
    565 		}
    566 	}
    567 
    568 	// Scan until we pass pc, then back up one.
    569 	for {
    570 		var next LineEntry
    571 		pos := r.Tell()
    572 		if err := r.Next(&next); err != nil {
    573 			if err == io.EOF {
    574 				return ErrUnknownPC
    575 			}
    576 			return err
    577 		}
    578 		if next.Address > pc {
    579 			if entry.EndSequence {
    580 				// pc is in a hole in the table.
    581 				return ErrUnknownPC
    582 			}
    583 			// entry is the desired entry. Back up the
    584 			// cursor to "next" and return success.
    585 			r.Seek(pos)
    586 			return nil
    587 		}
    588 		*entry = next
    589 	}
    590 }
    591