Home | History | Annotate | Download | only in ast
      1 // Copyright 2012 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package ast
      6 
      7 import (
      8 	"bytes"
      9 	"fmt"
     10 	"go/token"
     11 	"sort"
     12 )
     13 
     14 type byPos []*CommentGroup
     15 
     16 func (a byPos) Len() int           { return len(a) }
     17 func (a byPos) Less(i, j int) bool { return a[i].Pos() < a[j].Pos() }
     18 func (a byPos) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
     19 
     20 // sortComments sorts the list of comment groups in source order.
     21 //
     22 func sortComments(list []*CommentGroup) {
     23 	// TODO(gri): Does it make sense to check for sorted-ness
     24 	//            first (because we know that sorted-ness is
     25 	//            very likely)?
     26 	if orderedList := byPos(list); !sort.IsSorted(orderedList) {
     27 		sort.Sort(orderedList)
     28 	}
     29 }
     30 
     31 // A CommentMap maps an AST node to a list of comment groups
     32 // associated with it. See NewCommentMap for a description of
     33 // the association.
     34 //
     35 type CommentMap map[Node][]*CommentGroup
     36 
     37 func (cmap CommentMap) addComment(n Node, c *CommentGroup) {
     38 	list := cmap[n]
     39 	if len(list) == 0 {
     40 		list = []*CommentGroup{c}
     41 	} else {
     42 		list = append(list, c)
     43 	}
     44 	cmap[n] = list
     45 }
     46 
     47 type byInterval []Node
     48 
     49 func (a byInterval) Len() int { return len(a) }
     50 func (a byInterval) Less(i, j int) bool {
     51 	pi, pj := a[i].Pos(), a[j].Pos()
     52 	return pi < pj || pi == pj && a[i].End() > a[j].End()
     53 }
     54 func (a byInterval) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
     55 
     56 // nodeList returns the list of nodes of the AST n in source order.
     57 //
     58 func nodeList(n Node) []Node {
     59 	var list []Node
     60 	Inspect(n, func(n Node) bool {
     61 		// don't collect comments
     62 		switch n.(type) {
     63 		case nil, *CommentGroup, *Comment:
     64 			return false
     65 		}
     66 		list = append(list, n)
     67 		return true
     68 	})
     69 	// Note: The current implementation assumes that Inspect traverses the
     70 	//       AST in depth-first and thus _source_ order. If AST traversal
     71 	//       does not follow source order, the sorting call below will be
     72 	//       required.
     73 	// sort.Sort(byInterval(list))
     74 	return list
     75 }
     76 
     77 // A commentListReader helps iterating through a list of comment groups.
     78 //
     79 type commentListReader struct {
     80 	fset     *token.FileSet
     81 	list     []*CommentGroup
     82 	index    int
     83 	comment  *CommentGroup  // comment group at current index
     84 	pos, end token.Position // source interval of comment group at current index
     85 }
     86 
     87 func (r *commentListReader) eol() bool {
     88 	return r.index >= len(r.list)
     89 }
     90 
     91 func (r *commentListReader) next() {
     92 	if !r.eol() {
     93 		r.comment = r.list[r.index]
     94 		r.pos = r.fset.Position(r.comment.Pos())
     95 		r.end = r.fset.Position(r.comment.End())
     96 		r.index++
     97 	}
     98 }
     99 
    100 // A nodeStack keeps track of nested nodes.
    101 // A node lower on the stack lexically contains the nodes higher on the stack.
    102 //
    103 type nodeStack []Node
    104 
    105 // push pops all nodes that appear lexically before n
    106 // and then pushes n on the stack.
    107 //
    108 func (s *nodeStack) push(n Node) {
    109 	s.pop(n.Pos())
    110 	*s = append((*s), n)
    111 }
    112 
    113 // pop pops all nodes that appear lexically before pos
    114 // (i.e., whose lexical extent has ended before or at pos).
    115 // It returns the last node popped.
    116 //
    117 func (s *nodeStack) pop(pos token.Pos) (top Node) {
    118 	i := len(*s)
    119 	for i > 0 && (*s)[i-1].End() <= pos {
    120 		top = (*s)[i-1]
    121 		i--
    122 	}
    123 	*s = (*s)[0:i]
    124 	return top
    125 }
    126 
    127 // NewCommentMap creates a new comment map by associating comment groups
    128 // of the comments list with the nodes of the AST specified by node.
    129 //
    130 // A comment group g is associated with a node n if:
    131 //
    132 //   - g starts on the same line as n ends
    133 //   - g starts on the line immediately following n, and there is
    134 //     at least one empty line after g and before the next node
    135 //   - g starts before n and is not associated to the node before n
    136 //     via the previous rules
    137 //
    138 // NewCommentMap tries to associate a comment group to the "largest"
    139 // node possible: For instance, if the comment is a line comment
    140 // trailing an assignment, the comment is associated with the entire
    141 // assignment rather than just the last operand in the assignment.
    142 //
    143 func NewCommentMap(fset *token.FileSet, node Node, comments []*CommentGroup) CommentMap {
    144 	if len(comments) == 0 {
    145 		return nil // no comments to map
    146 	}
    147 
    148 	cmap := make(CommentMap)
    149 
    150 	// set up comment reader r
    151 	tmp := make([]*CommentGroup, len(comments))
    152 	copy(tmp, comments) // don't change incoming comments
    153 	sortComments(tmp)
    154 	r := commentListReader{fset: fset, list: tmp} // !r.eol() because len(comments) > 0
    155 	r.next()
    156 
    157 	// create node list in lexical order
    158 	nodes := nodeList(node)
    159 	nodes = append(nodes, nil) // append sentinel
    160 
    161 	// set up iteration variables
    162 	var (
    163 		p     Node           // previous node
    164 		pend  token.Position // end of p
    165 		pg    Node           // previous node group (enclosing nodes of "importance")
    166 		pgend token.Position // end of pg
    167 		stack nodeStack      // stack of node groups
    168 	)
    169 
    170 	for _, q := range nodes {
    171 		var qpos token.Position
    172 		if q != nil {
    173 			qpos = fset.Position(q.Pos()) // current node position
    174 		} else {
    175 			// set fake sentinel position to infinity so that
    176 			// all comments get processed before the sentinel
    177 			const infinity = 1 << 30
    178 			qpos.Offset = infinity
    179 			qpos.Line = infinity
    180 		}
    181 
    182 		// process comments before current node
    183 		for r.end.Offset <= qpos.Offset {
    184 			// determine recent node group
    185 			if top := stack.pop(r.comment.Pos()); top != nil {
    186 				pg = top
    187 				pgend = fset.Position(pg.End())
    188 			}
    189 			// Try to associate a comment first with a node group
    190 			// (i.e., a node of "importance" such as a declaration);
    191 			// if that fails, try to associate it with the most recent
    192 			// node.
    193 			// TODO(gri) try to simplify the logic below
    194 			var assoc Node
    195 			switch {
    196 			case pg != nil &&
    197 				(pgend.Line == r.pos.Line ||
    198 					pgend.Line+1 == r.pos.Line && r.end.Line+1 < qpos.Line):
    199 				// 1) comment starts on same line as previous node group ends, or
    200 				// 2) comment starts on the line immediately after the
    201 				//    previous node group and there is an empty line before
    202 				//    the current node
    203 				// => associate comment with previous node group
    204 				assoc = pg
    205 			case p != nil &&
    206 				(pend.Line == r.pos.Line ||
    207 					pend.Line+1 == r.pos.Line && r.end.Line+1 < qpos.Line ||
    208 					q == nil):
    209 				// same rules apply as above for p rather than pg,
    210 				// but also associate with p if we are at the end (q == nil)
    211 				assoc = p
    212 			default:
    213 				// otherwise, associate comment with current node
    214 				if q == nil {
    215 					// we can only reach here if there was no p
    216 					// which would imply that there were no nodes
    217 					panic("internal error: no comments should be associated with sentinel")
    218 				}
    219 				assoc = q
    220 			}
    221 			cmap.addComment(assoc, r.comment)
    222 			if r.eol() {
    223 				return cmap
    224 			}
    225 			r.next()
    226 		}
    227 
    228 		// update previous node
    229 		p = q
    230 		pend = fset.Position(p.End())
    231 
    232 		// update previous node group if we see an "important" node
    233 		switch q.(type) {
    234 		case *File, *Field, Decl, Spec, Stmt:
    235 			stack.push(q)
    236 		}
    237 	}
    238 
    239 	return cmap
    240 }
    241 
    242 // Update replaces an old node in the comment map with the new node
    243 // and returns the new node. Comments that were associated with the
    244 // old node are associated with the new node.
    245 //
    246 func (cmap CommentMap) Update(old, new Node) Node {
    247 	if list := cmap[old]; len(list) > 0 {
    248 		delete(cmap, old)
    249 		cmap[new] = append(cmap[new], list...)
    250 	}
    251 	return new
    252 }
    253 
    254 // Filter returns a new comment map consisting of only those
    255 // entries of cmap for which a corresponding node exists in
    256 // the AST specified by node.
    257 //
    258 func (cmap CommentMap) Filter(node Node) CommentMap {
    259 	umap := make(CommentMap)
    260 	Inspect(node, func(n Node) bool {
    261 		if g := cmap[n]; len(g) > 0 {
    262 			umap[n] = g
    263 		}
    264 		return true
    265 	})
    266 	return umap
    267 }
    268 
    269 // Comments returns the list of comment groups in the comment map.
    270 // The result is sorted in source order.
    271 //
    272 func (cmap CommentMap) Comments() []*CommentGroup {
    273 	list := make([]*CommentGroup, 0, len(cmap))
    274 	for _, e := range cmap {
    275 		list = append(list, e...)
    276 	}
    277 	sortComments(list)
    278 	return list
    279 }
    280 
    281 func summary(list []*CommentGroup) string {
    282 	const maxLen = 40
    283 	var buf bytes.Buffer
    284 
    285 	// collect comments text
    286 loop:
    287 	for _, group := range list {
    288 		// Note: CommentGroup.Text() does too much work for what we
    289 		//       need and would only replace this innermost loop.
    290 		//       Just do it explicitly.
    291 		for _, comment := range group.List {
    292 			if buf.Len() >= maxLen {
    293 				break loop
    294 			}
    295 			buf.WriteString(comment.Text)
    296 		}
    297 	}
    298 
    299 	// truncate if too long
    300 	if buf.Len() > maxLen {
    301 		buf.Truncate(maxLen - 3)
    302 		buf.WriteString("...")
    303 	}
    304 
    305 	// replace any invisibles with blanks
    306 	bytes := buf.Bytes()
    307 	for i, b := range bytes {
    308 		switch b {
    309 		case '\t', '\n', '\r':
    310 			bytes[i] = ' '
    311 		}
    312 	}
    313 
    314 	return string(bytes)
    315 }
    316 
    317 func (cmap CommentMap) String() string {
    318 	var buf bytes.Buffer
    319 	fmt.Fprintln(&buf, "CommentMap {")
    320 	for node, comment := range cmap {
    321 		// print name of identifiers; print node type for other nodes
    322 		var s string
    323 		if ident, ok := node.(*Ident); ok {
    324 			s = ident.Name
    325 		} else {
    326 			s = fmt.Sprintf("%T", node)
    327 		}
    328 		fmt.Fprintf(&buf, "\t%p  %20s:  %s\n", node, s, summary(comment))
    329 	}
    330 	fmt.Fprintln(&buf, "}")
    331 	return buf.String()
    332 }
    333