Home | History | Annotate | Download | only in dwarf
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package dwarf
      6 
      7 import (
      8 	"errors"
      9 	"fmt"
     10 	"io"
     11 	"path"
     12 	"strings"
     13 )
     14 
     15 // A LineReader reads a sequence of LineEntry structures from a DWARF
     16 // "line" section for a single compilation unit. LineEntries occur in
     17 // order of increasing PC and each LineEntry gives metadata for the
     18 // instructions from that LineEntry's PC to just before the next
     19 // LineEntry's PC. The last entry will have its EndSequence field set.
     20 type LineReader struct {
     21 	buf buf
     22 
     23 	// Original .debug_line section data. Used by Seek.
     24 	section []byte
     25 
     26 	// Header information
     27 	version              uint16
     28 	minInstructionLength int
     29 	maxOpsPerInstruction int
     30 	defaultIsStmt        bool
     31 	lineBase             int
     32 	lineRange            int
     33 	opcodeBase           int
     34 	opcodeLengths        []int
     35 	directories          []string
     36 	fileEntries          []*LineFile
     37 
     38 	programOffset Offset // section offset of line number program
     39 	endOffset     Offset // section offset of byte following program
     40 
     41 	initialFileEntries int // initial length of fileEntries
     42 
     43 	// Current line number program state machine registers
     44 	state     LineEntry // public state
     45 	fileIndex int       // private state
     46 }
     47 
     48 // A LineEntry is a row in a DWARF line table.
     49 type LineEntry struct {
     50 	// Address is the program-counter value of a machine
     51 	// instruction generated by the compiler. This LineEntry
     52 	// applies to each instruction from Address to just before the
     53 	// Address of the next LineEntry.
     54 	Address uint64
     55 
     56 	// OpIndex is the index of an operation within a VLIW
     57 	// instruction. The index of the first operation is 0. For
     58 	// non-VLIW architectures, it will always be 0. Address and
     59 	// OpIndex together form an operation pointer that can
     60 	// reference any individual operation within the instruction
     61 	// stream.
     62 	OpIndex int
     63 
     64 	// File is the source file corresponding to these
     65 	// instructions.
     66 	File *LineFile
     67 
     68 	// Line is the source code line number corresponding to these
     69 	// instructions. Lines are numbered beginning at 1. It may be
     70 	// 0 if these instructions cannot be attributed to any source
     71 	// line.
     72 	Line int
     73 
     74 	// Column is the column number within the source line of these
     75 	// instructions. Columns are numbered beginning at 1. It may
     76 	// be 0 to indicate the "left edge" of the line.
     77 	Column int
     78 
     79 	// IsStmt indicates that Address is a recommended breakpoint
     80 	// location, such as the beginning of a line, statement, or a
     81 	// distinct subpart of a statement.
     82 	IsStmt bool
     83 
     84 	// BasicBlock indicates that Address is the beginning of a
     85 	// basic block.
     86 	BasicBlock bool
     87 
     88 	// PrologueEnd indicates that Address is one (of possibly
     89 	// many) PCs where execution should be suspended for a
     90 	// breakpoint on entry to the containing function.
     91 	//
     92 	// Added in DWARF 3.
     93 	PrologueEnd bool
     94 
     95 	// EpilogueBegin indicates that Address is one (of possibly
     96 	// many) PCs where execution should be suspended for a
     97 	// breakpoint on exit from this function.
     98 	//
     99 	// Added in DWARF 3.
    100 	EpilogueBegin bool
    101 
    102 	// ISA is the instruction set architecture for these
    103 	// instructions. Possible ISA values should be defined by the
    104 	// applicable ABI specification.
    105 	//
    106 	// Added in DWARF 3.
    107 	ISA int
    108 
    109 	// Discriminator is an arbitrary integer indicating the block
    110 	// to which these instructions belong. It serves to
    111 	// distinguish among multiple blocks that may all have with
    112 	// the same source file, line, and column. Where only one
    113 	// block exists for a given source position, it should be 0.
    114 	//
    115 	// Added in DWARF 3.
    116 	Discriminator int
    117 
    118 	// EndSequence indicates that Address is the first byte after
    119 	// the end of a sequence of target machine instructions. If it
    120 	// is set, only this and the Address field are meaningful. A
    121 	// line number table may contain information for multiple
    122 	// potentially disjoint instruction sequences. The last entry
    123 	// in a line table should always have EndSequence set.
    124 	EndSequence bool
    125 }
    126 
    127 // A LineFile is a source file referenced by a DWARF line table entry.
    128 type LineFile struct {
    129 	Name   string
    130 	Mtime  uint64 // Implementation defined modification time, or 0 if unknown
    131 	Length int    // File length, or 0 if unknown
    132 }
    133 
    134 // LineReader returns a new reader for the line table of compilation
    135 // unit cu, which must be an Entry with tag TagCompileUnit.
    136 //
    137 // If this compilation unit has no line table, it returns nil, nil.
    138 func (d *Data) LineReader(cu *Entry) (*LineReader, error) {
    139 	if d.line == nil {
    140 		// No line tables available.
    141 		return nil, nil
    142 	}
    143 
    144 	// Get line table information from cu.
    145 	off, ok := cu.Val(AttrStmtList).(int64)
    146 	if !ok {
    147 		// cu has no line table.
    148 		return nil, nil
    149 	}
    150 	if off > int64(len(d.line)) {
    151 		return nil, errors.New("AttrStmtList value out of range")
    152 	}
    153 	// AttrCompDir is optional if all file names are absolute. Use
    154 	// the empty string if it's not present.
    155 	compDir, _ := cu.Val(AttrCompDir).(string)
    156 
    157 	// Create the LineReader.
    158 	u := &d.unit[d.offsetToUnit(cu.Offset)]
    159 	buf := makeBuf(d, u, "line", Offset(off), d.line[off:])
    160 	// The compilation directory is implicitly directories[0].
    161 	r := LineReader{buf: buf, section: d.line, directories: []string{compDir}}
    162 
    163 	// Read the header.
    164 	if err := r.readHeader(); err != nil {
    165 		return nil, err
    166 	}
    167 
    168 	// Initialize line reader state.
    169 	r.Reset()
    170 
    171 	return &r, nil
    172 }
    173 
    174 // readHeader reads the line number program header from r.buf and sets
    175 // all of the header fields in r.
    176 func (r *LineReader) readHeader() error {
    177 	buf := &r.buf
    178 
    179 	// Read basic header fields [DWARF2 6.2.4].
    180 	hdrOffset := buf.off
    181 	unitLength, dwarf64 := buf.unitLength()
    182 	r.endOffset = buf.off + unitLength
    183 	if r.endOffset > buf.off+Offset(len(buf.data)) {
    184 		return DecodeError{"line", hdrOffset, fmt.Sprintf("line table end %d exceeds section size %d", r.endOffset, buf.off+Offset(len(buf.data)))}
    185 	}
    186 	r.version = buf.uint16()
    187 	if buf.err == nil && (r.version < 2 || r.version > 4) {
    188 		// DWARF goes to all this effort to make new opcodes
    189 		// backward-compatible, and then adds fields right in
    190 		// the middle of the header in new versions, so we're
    191 		// picky about only supporting known line table
    192 		// versions.
    193 		return DecodeError{"line", hdrOffset, fmt.Sprintf("unknown line table version %d", r.version)}
    194 	}
    195 	var headerLength Offset
    196 	if dwarf64 {
    197 		headerLength = Offset(buf.uint64())
    198 	} else {
    199 		headerLength = Offset(buf.uint32())
    200 	}
    201 	r.programOffset = buf.off + headerLength
    202 	r.minInstructionLength = int(buf.uint8())
    203 	if r.version >= 4 {
    204 		// [DWARF4 6.2.4]
    205 		r.maxOpsPerInstruction = int(buf.uint8())
    206 	} else {
    207 		r.maxOpsPerInstruction = 1
    208 	}
    209 	r.defaultIsStmt = buf.uint8() != 0
    210 	r.lineBase = int(int8(buf.uint8()))
    211 	r.lineRange = int(buf.uint8())
    212 
    213 	// Validate header.
    214 	if buf.err != nil {
    215 		return buf.err
    216 	}
    217 	if r.maxOpsPerInstruction == 0 {
    218 		return DecodeError{"line", hdrOffset, "invalid maximum operations per instruction: 0"}
    219 	}
    220 	if r.lineRange == 0 {
    221 		return DecodeError{"line", hdrOffset, "invalid line range: 0"}
    222 	}
    223 
    224 	// Read standard opcode length table. This table starts with opcode 1.
    225 	r.opcodeBase = int(buf.uint8())
    226 	r.opcodeLengths = make([]int, r.opcodeBase)
    227 	for i := 1; i < r.opcodeBase; i++ {
    228 		r.opcodeLengths[i] = int(buf.uint8())
    229 	}
    230 
    231 	// Validate opcode lengths.
    232 	if buf.err != nil {
    233 		return buf.err
    234 	}
    235 	for i, length := range r.opcodeLengths {
    236 		if known, ok := knownOpcodeLengths[i]; ok && known != length {
    237 			return DecodeError{"line", hdrOffset, fmt.Sprintf("opcode %d expected to have length %d, but has length %d", i, known, length)}
    238 		}
    239 	}
    240 
    241 	// Read include directories table. The caller already set
    242 	// directories[0] to the compilation directory.
    243 	for {
    244 		directory := buf.string()
    245 		if buf.err != nil {
    246 			return buf.err
    247 		}
    248 		if len(directory) == 0 {
    249 			break
    250 		}
    251 		if !pathIsAbs(directory) {
    252 			// Relative paths are implicitly relative to
    253 			// the compilation directory.
    254 			directory = pathJoin(r.directories[0], directory)
    255 		}
    256 		r.directories = append(r.directories, directory)
    257 	}
    258 
    259 	// Read file name list. File numbering starts with 1, so leave
    260 	// the first entry nil.
    261 	r.fileEntries = make([]*LineFile, 1)
    262 	for {
    263 		if done, err := r.readFileEntry(); err != nil {
    264 			return err
    265 		} else if done {
    266 			break
    267 		}
    268 	}
    269 	r.initialFileEntries = len(r.fileEntries)
    270 
    271 	return buf.err
    272 }
    273 
    274 // readFileEntry reads a file entry from either the header or a
    275 // DW_LNE_define_file extended opcode and adds it to r.fileEntries. A
    276 // true return value indicates that there are no more entries to read.
    277 func (r *LineReader) readFileEntry() (bool, error) {
    278 	name := r.buf.string()
    279 	if r.buf.err != nil {
    280 		return false, r.buf.err
    281 	}
    282 	if len(name) == 0 {
    283 		return true, nil
    284 	}
    285 	off := r.buf.off
    286 	dirIndex := int(r.buf.uint())
    287 	if !pathIsAbs(name) {
    288 		if dirIndex >= len(r.directories) {
    289 			return false, DecodeError{"line", off, "directory index too large"}
    290 		}
    291 		name = pathJoin(r.directories[dirIndex], name)
    292 	}
    293 	mtime := r.buf.uint()
    294 	length := int(r.buf.uint())
    295 
    296 	r.fileEntries = append(r.fileEntries, &LineFile{name, mtime, length})
    297 	return false, nil
    298 }
    299 
    300 // updateFile updates r.state.File after r.fileIndex has
    301 // changed or r.fileEntries has changed.
    302 func (r *LineReader) updateFile() {
    303 	if r.fileIndex < len(r.fileEntries) {
    304 		r.state.File = r.fileEntries[r.fileIndex]
    305 	} else {
    306 		r.state.File = nil
    307 	}
    308 }
    309 
    310 // Next sets *entry to the next row in this line table and moves to
    311 // the next row. If there are no more entries and the line table is
    312 // properly terminated, it returns io.EOF.
    313 //
    314 // Rows are always in order of increasing entry.Address, but
    315 // entry.Line may go forward or backward.
    316 func (r *LineReader) Next(entry *LineEntry) error {
    317 	if r.buf.err != nil {
    318 		return r.buf.err
    319 	}
    320 
    321 	// Execute opcodes until we reach an opcode that emits a line
    322 	// table entry.
    323 	for {
    324 		if len(r.buf.data) == 0 {
    325 			return io.EOF
    326 		}
    327 		emit := r.step(entry)
    328 		if r.buf.err != nil {
    329 			return r.buf.err
    330 		}
    331 		if emit {
    332 			return nil
    333 		}
    334 	}
    335 }
    336 
    337 // knownOpcodeLengths gives the opcode lengths (in varint arguments)
    338 // of known standard opcodes.
    339 var knownOpcodeLengths = map[int]int{
    340 	lnsCopy:             0,
    341 	lnsAdvancePC:        1,
    342 	lnsAdvanceLine:      1,
    343 	lnsSetFile:          1,
    344 	lnsNegateStmt:       0,
    345 	lnsSetBasicBlock:    0,
    346 	lnsConstAddPC:       0,
    347 	lnsSetPrologueEnd:   0,
    348 	lnsSetEpilogueBegin: 0,
    349 	lnsSetISA:           1,
    350 	// lnsFixedAdvancePC takes a uint8 rather than a varint; it's
    351 	// unclear what length the header is supposed to claim, so
    352 	// ignore it.
    353 }
    354 
    355 // step processes the next opcode and updates r.state. If the opcode
    356 // emits a row in the line table, this updates *entry and returns
    357 // true.
    358 func (r *LineReader) step(entry *LineEntry) bool {
    359 	opcode := int(r.buf.uint8())
    360 
    361 	if opcode >= r.opcodeBase {
    362 		// Special opcode [DWARF2 6.2.5.1, DWARF4 6.2.5.1]
    363 		adjustedOpcode := opcode - r.opcodeBase
    364 		r.advancePC(adjustedOpcode / r.lineRange)
    365 		lineDelta := r.lineBase + adjustedOpcode%r.lineRange
    366 		r.state.Line += lineDelta
    367 		goto emit
    368 	}
    369 
    370 	switch opcode {
    371 	case 0:
    372 		// Extended opcode [DWARF2 6.2.5.3]
    373 		length := Offset(r.buf.uint())
    374 		startOff := r.buf.off
    375 		opcode := r.buf.uint8()
    376 
    377 		switch opcode {
    378 		case lneEndSequence:
    379 			r.state.EndSequence = true
    380 			*entry = r.state
    381 			r.resetState()
    382 
    383 		case lneSetAddress:
    384 			r.state.Address = r.buf.addr()
    385 
    386 		case lneDefineFile:
    387 			if done, err := r.readFileEntry(); err != nil {
    388 				r.buf.err = err
    389 				return false
    390 			} else if done {
    391 				r.buf.err = DecodeError{"line", startOff, "malformed DW_LNE_define_file operation"}
    392 				return false
    393 			}
    394 			r.updateFile()
    395 
    396 		case lneSetDiscriminator:
    397 			// [DWARF4 6.2.5.3]
    398 			r.state.Discriminator = int(r.buf.uint())
    399 		}
    400 
    401 		r.buf.skip(int(startOff + length - r.buf.off))
    402 
    403 		if opcode == lneEndSequence {
    404 			return true
    405 		}
    406 
    407 	// Standard opcodes [DWARF2 6.2.5.2]
    408 	case lnsCopy:
    409 		goto emit
    410 
    411 	case lnsAdvancePC:
    412 		r.advancePC(int(r.buf.uint()))
    413 
    414 	case lnsAdvanceLine:
    415 		r.state.Line += int(r.buf.int())
    416 
    417 	case lnsSetFile:
    418 		r.fileIndex = int(r.buf.uint())
    419 		r.updateFile()
    420 
    421 	case lnsSetColumn:
    422 		r.state.Column = int(r.buf.uint())
    423 
    424 	case lnsNegateStmt:
    425 		r.state.IsStmt = !r.state.IsStmt
    426 
    427 	case lnsSetBasicBlock:
    428 		r.state.BasicBlock = true
    429 
    430 	case lnsConstAddPC:
    431 		r.advancePC((255 - r.opcodeBase) / r.lineRange)
    432 
    433 	case lnsFixedAdvancePC:
    434 		r.state.Address += uint64(r.buf.uint16())
    435 
    436 	// DWARF3 standard opcodes [DWARF3 6.2.5.2]
    437 	case lnsSetPrologueEnd:
    438 		r.state.PrologueEnd = true
    439 
    440 	case lnsSetEpilogueBegin:
    441 		r.state.EpilogueBegin = true
    442 
    443 	case lnsSetISA:
    444 		r.state.ISA = int(r.buf.uint())
    445 
    446 	default:
    447 		// Unhandled standard opcode. Skip the number of
    448 		// arguments that the prologue says this opcode has.
    449 		for i := 0; i < r.opcodeLengths[opcode]; i++ {
    450 			r.buf.uint()
    451 		}
    452 	}
    453 	return false
    454 
    455 emit:
    456 	*entry = r.state
    457 	r.state.BasicBlock = false
    458 	r.state.PrologueEnd = false
    459 	r.state.EpilogueBegin = false
    460 	r.state.Discriminator = 0
    461 	return true
    462 }
    463 
    464 // advancePC advances "operation pointer" (the combination of Address
    465 // and OpIndex) in r.state by opAdvance steps.
    466 func (r *LineReader) advancePC(opAdvance int) {
    467 	opIndex := r.state.OpIndex + opAdvance
    468 	r.state.Address += uint64(r.minInstructionLength * (opIndex / r.maxOpsPerInstruction))
    469 	r.state.OpIndex = opIndex % r.maxOpsPerInstruction
    470 }
    471 
    472 // A LineReaderPos represents a position in a line table.
    473 type LineReaderPos struct {
    474 	// off is the current offset in the DWARF line section.
    475 	off Offset
    476 	// numFileEntries is the length of fileEntries.
    477 	numFileEntries int
    478 	// state and fileIndex are the statement machine state at
    479 	// offset off.
    480 	state     LineEntry
    481 	fileIndex int
    482 }
    483 
    484 // Tell returns the current position in the line table.
    485 func (r *LineReader) Tell() LineReaderPos {
    486 	return LineReaderPos{r.buf.off, len(r.fileEntries), r.state, r.fileIndex}
    487 }
    488 
    489 // Seek restores the line table reader to a position returned by Tell.
    490 //
    491 // The argument pos must have been returned by a call to Tell on this
    492 // line table.
    493 func (r *LineReader) Seek(pos LineReaderPos) {
    494 	r.buf.off = pos.off
    495 	r.buf.data = r.section[r.buf.off:r.endOffset]
    496 	r.fileEntries = r.fileEntries[:pos.numFileEntries]
    497 	r.state = pos.state
    498 	r.fileIndex = pos.fileIndex
    499 }
    500 
    501 // Reset repositions the line table reader at the beginning of the
    502 // line table.
    503 func (r *LineReader) Reset() {
    504 	// Reset buffer to the line number program offset.
    505 	r.buf.off = r.programOffset
    506 	r.buf.data = r.section[r.buf.off:r.endOffset]
    507 
    508 	// Reset file entries list.
    509 	r.fileEntries = r.fileEntries[:r.initialFileEntries]
    510 
    511 	// Reset line number program state.
    512 	r.resetState()
    513 }
    514 
    515 // resetState resets r.state to its default values
    516 func (r *LineReader) resetState() {
    517 	// Reset the state machine registers to the defaults given in
    518 	// [DWARF4 6.2.2].
    519 	r.state = LineEntry{
    520 		Address:       0,
    521 		OpIndex:       0,
    522 		File:          nil,
    523 		Line:          1,
    524 		Column:        0,
    525 		IsStmt:        r.defaultIsStmt,
    526 		BasicBlock:    false,
    527 		PrologueEnd:   false,
    528 		EpilogueBegin: false,
    529 		ISA:           0,
    530 		Discriminator: 0,
    531 	}
    532 	r.fileIndex = 1
    533 	r.updateFile()
    534 }
    535 
    536 // ErrUnknownPC is the error returned by LineReader.ScanPC when the
    537 // seek PC is not covered by any entry in the line table.
    538 var ErrUnknownPC = errors.New("ErrUnknownPC")
    539 
    540 // SeekPC sets *entry to the LineEntry that includes pc and positions
    541 // the reader on the next entry in the line table. If necessary, this
    542 // will seek backwards to find pc.
    543 //
    544 // If pc is not covered by any entry in this line table, SeekPC
    545 // returns ErrUnknownPC. In this case, *entry and the final seek
    546 // position are unspecified.
    547 //
    548 // Note that DWARF line tables only permit sequential, forward scans.
    549 // Hence, in the worst case, this takes time linear in the size of the
    550 // line table. If the caller wishes to do repeated fast PC lookups, it
    551 // should build an appropriate index of the line table.
    552 func (r *LineReader) SeekPC(pc uint64, entry *LineEntry) error {
    553 	if err := r.Next(entry); err != nil {
    554 		return err
    555 	}
    556 	if entry.Address > pc {
    557 		// We're too far. Start at the beginning of the table.
    558 		r.Reset()
    559 		if err := r.Next(entry); err != nil {
    560 			return err
    561 		}
    562 		if entry.Address > pc {
    563 			// The whole table starts after pc.
    564 			r.Reset()
    565 			return ErrUnknownPC
    566 		}
    567 	}
    568 
    569 	// Scan until we pass pc, then back up one.
    570 	for {
    571 		var next LineEntry
    572 		pos := r.Tell()
    573 		if err := r.Next(&next); err != nil {
    574 			if err == io.EOF {
    575 				return ErrUnknownPC
    576 			}
    577 			return err
    578 		}
    579 		if next.Address > pc {
    580 			if entry.EndSequence {
    581 				// pc is in a hole in the table.
    582 				return ErrUnknownPC
    583 			}
    584 			// entry is the desired entry. Back up the
    585 			// cursor to "next" and return success.
    586 			r.Seek(pos)
    587 			return nil
    588 		}
    589 		*entry = next
    590 	}
    591 }
    592 
    593 // pathIsAbs returns whether path is an absolute path (or "full path
    594 // name" in DWARF parlance). This is in "whatever form makes sense for
    595 // the host system", so this accepts both UNIX-style and DOS-style
    596 // absolute paths. We avoid the filepath package because we want this
    597 // to behave the same regardless of our host system and because we
    598 // don't know what system the paths came from.
    599 func pathIsAbs(path string) bool {
    600 	_, path = splitDrive(path)
    601 	return len(path) > 0 && (path[0] == '/' || path[0] == '\\')
    602 }
    603 
    604 // pathJoin joins dirname and filename. filename must be relative.
    605 // DWARF paths can be UNIX-style or DOS-style, so this handles both.
    606 func pathJoin(dirname, filename string) string {
    607 	if len(dirname) == 0 {
    608 		return filename
    609 	}
    610 	// dirname should be absolute, which means we can determine
    611 	// whether it's a DOS path reasonably reliably by looking for
    612 	// a drive letter or UNC path.
    613 	drive, dirname := splitDrive(dirname)
    614 	if drive == "" {
    615 		// UNIX-style path.
    616 		return path.Join(dirname, filename)
    617 	}
    618 	// DOS-style path.
    619 	drive2, filename := splitDrive(filename)
    620 	if drive2 != "" {
    621 		if strings.ToLower(drive) != strings.ToLower(drive2) {
    622 			// Different drives. There's not much we can
    623 			// do here, so just ignore the directory.
    624 			return drive2 + filename
    625 		}
    626 		// Drives are the same. Ignore drive on filename.
    627 	}
    628 	if !(strings.HasSuffix(dirname, "/") || strings.HasSuffix(dirname, `\`)) && dirname != "" {
    629 		dirname += `\`
    630 	}
    631 	return drive + dirname + filename
    632 }
    633 
    634 // splitDrive splits the DOS drive letter or UNC share point from
    635 // path, if any. path == drive + rest
    636 func splitDrive(path string) (drive, rest string) {
    637 	if len(path) >= 2 && path[1] == ':' {
    638 		if c := path[0]; 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
    639 			return path[:2], path[2:]
    640 		}
    641 	}
    642 	if len(path) > 3 && (path[0] == '\\' || path[0] == '/') && (path[1] == '\\' || path[1] == '/') {
    643 		// Normalize the path so we can search for just \ below.
    644 		npath := strings.Replace(path, "/", `\`, -1)
    645 		// Get the host part, which must be non-empty.
    646 		slash1 := strings.IndexByte(npath[2:], '\\') + 2
    647 		if slash1 > 2 {
    648 			// Get the mount-point part, which must be non-empty.
    649 			slash2 := strings.IndexByte(npath[slash1+1:], '\\') + slash1 + 1
    650 			if slash2 > slash1 {
    651 				return path[:slash2], path[slash2:]
    652 			}
    653 		}
    654 	}
    655 	return "", path
    656 }
    657