Home | History | Annotate | Download | only in tree
      1 /*
      2  [The "BSD license"]
      3  Copyright (c) 2005-2009 Terence Parr
      4  All rights reserved.
      5 
      6  Redistribution and use in source and binary forms, with or without
      7  modification, are permitted provided that the following conditions
      8  are met:
      9  1. Redistributions of source code must retain the above copyright
     10      notice, this list of conditions and the following disclaimer.
     11  2. Redistributions in binary form must reproduce the above copyright
     12      notice, this list of conditions and the following disclaimer in the
     13      documentation and/or other materials provided with the distribution.
     14  3. The name of the author may not be used to endorse or promote products
     15      derived from this software without specific prior written permission.
     16 
     17  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     18  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     19  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     20  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     21  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     22  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 package org.antlr.runtime.tree;
     29 
     30 import org.antlr.runtime.Token;
     31 
     32 import java.util.ArrayList;
     33 import java.util.HashMap;
     34 import java.util.List;
     35 import java.util.Map;
     36 
     37 /** Build and navigate trees with this object.  Must know about the names
     38  *  of tokens so you have to pass in a map or array of token names (from which
     39  *  this class can build the map).  I.e., Token DECL means nothing unless the
     40  *  class can translate it to a token type.
     41  *
     42  *  In order to create nodes and navigate, this class needs a TreeAdaptor.
     43  *
     44  *  This class can build a token type -> node index for repeated use or for
     45  *  iterating over the various nodes with a particular type.
     46  *
     47  *  This class works in conjunction with the TreeAdaptor rather than moving
     48  *  all this functionality into the adaptor.  An adaptor helps build and
     49  *  navigate trees using methods.  This class helps you do it with string
     50  *  patterns like "(A B C)".  You can create a tree from that pattern or
     51  *  match subtrees against it.
     52  */
     53 public class TreeWizard {
     54 	protected TreeAdaptor adaptor;
     55 	protected Map tokenNameToTypeMap;
     56 
     57 	public interface ContextVisitor {
     58 		// TODO: should this be called visit or something else?
     59 		public void visit(Object t, Object parent, int childIndex, Map labels);
     60 	}
     61 
     62 	public static abstract class Visitor implements ContextVisitor {
     63 		public void visit(Object t, Object parent, int childIndex, Map labels) {
     64 			visit(t);
     65 		}
     66 		public abstract void visit(Object t);
     67 	}
     68 
     69 	/** When using %label:TOKENNAME in a tree for parse(), we must
     70 	 *  track the label.
     71 	 */
     72 	public static class TreePattern extends CommonTree {
     73 		public String label;
     74 		public boolean hasTextArg;
     75 		public TreePattern(Token payload) {
     76 			super(payload);
     77 		}
     78 		public String toString() {
     79 			if ( label!=null ) {
     80 				return "%"+label+":"+super.toString();
     81 			}
     82 			else {
     83 				return super.toString();
     84 			}
     85 		}
     86 	}
     87 
     88 	public static class WildcardTreePattern extends TreePattern {
     89 		public WildcardTreePattern(Token payload) {
     90 			super(payload);
     91 		}
     92 	}
     93 
     94 	/** This adaptor creates TreePattern objects for use during scan() */
     95 	public static class TreePatternTreeAdaptor extends CommonTreeAdaptor {
     96 		public Object create(Token payload) {
     97 			return new TreePattern(payload);
     98 		}
     99 	}
    100 
    101 	// TODO: build indexes for the wizard
    102 
    103 	/** During fillBuffer(), we can make a reverse index from a set
    104 	 *  of token types of interest to the list of indexes into the
    105 	 *  node stream.  This lets us convert a node pointer to a
    106 	 *  stream index semi-efficiently for a list of interesting
    107 	 *  nodes such as function definition nodes (you'll want to seek
    108 	 *  to their bodies for an interpreter).  Also useful for doing
    109 	 *  dynamic searches; i.e., go find me all PLUS nodes.
    110 	protected Map tokenTypeToStreamIndexesMap;
    111 
    112 	/** If tokenTypesToReverseIndex set to INDEX_ALL then indexing
    113 	 *  occurs for all token types.
    114 	public static final Set INDEX_ALL = new HashSet();
    115 
    116 	/** A set of token types user would like to index for faster lookup.
    117 	 *  If this is INDEX_ALL, then all token types are tracked.  If null,
    118 	 *  then none are indexed.
    119 	protected Set tokenTypesToReverseIndex = null;
    120 	*/
    121 
    122 	public TreeWizard(TreeAdaptor adaptor) {
    123 		this.adaptor = adaptor;
    124 	}
    125 
    126 	public TreeWizard(TreeAdaptor adaptor, Map tokenNameToTypeMap) {
    127 		this.adaptor = adaptor;
    128 		this.tokenNameToTypeMap = tokenNameToTypeMap;
    129 	}
    130 
    131 	public TreeWizard(TreeAdaptor adaptor, String[] tokenNames) {
    132 		this.adaptor = adaptor;
    133 		this.tokenNameToTypeMap = computeTokenTypes(tokenNames);
    134 	}
    135 
    136 	public TreeWizard(String[] tokenNames) {
    137 		this(new CommonTreeAdaptor(), tokenNames);
    138 	}
    139 
    140 	/** Compute a Map<String, Integer> that is an inverted index of
    141 	 *  tokenNames (which maps int token types to names).
    142 	 */
    143 	public Map computeTokenTypes(String[] tokenNames) {
    144 		Map m = new HashMap();
    145 		if ( tokenNames==null ) {
    146 			return m;
    147 		}
    148 		for (int ttype = Token.MIN_TOKEN_TYPE; ttype < tokenNames.length; ttype++) {
    149 			String name = tokenNames[ttype];
    150 			m.put(name, new Integer(ttype));
    151 		}
    152 		return m;
    153 	}
    154 
    155 	/** Using the map of token names to token types, return the type. */
    156 	public int getTokenType(String tokenName) {
    157 	 	if ( tokenNameToTypeMap==null ) {
    158 			 return Token.INVALID_TOKEN_TYPE;
    159 		 }
    160 		Integer ttypeI = (Integer)tokenNameToTypeMap.get(tokenName);
    161 		if ( ttypeI!=null ) {
    162 			return ttypeI.intValue();
    163 		}
    164 		return Token.INVALID_TOKEN_TYPE;
    165 	}
    166 
    167 	/** Walk the entire tree and make a node name to nodes mapping.
    168 	 *  For now, use recursion but later nonrecursive version may be
    169 	 *  more efficient.  Returns Map<Integer, List> where the List is
    170 	 *  of your AST node type.  The Integer is the token type of the node.
    171 	 *
    172 	 *  TODO: save this index so that find and visit are faster
    173 	 */
    174 	public Map index(Object t) {
    175 		Map m = new HashMap();
    176 		_index(t, m);
    177 		return m;
    178 	}
    179 
    180 	/** Do the work for index */
    181 	protected void _index(Object t, Map m) {
    182 		if ( t==null ) {
    183 			return;
    184 		}
    185 		int ttype = adaptor.getType(t);
    186 		List elements = (List)m.get(new Integer(ttype));
    187 		if ( elements==null ) {
    188 			elements = new ArrayList();
    189 			m.put(new Integer(ttype), elements);
    190 		}
    191 		elements.add(t);
    192 		int n = adaptor.getChildCount(t);
    193 		for (int i=0; i<n; i++) {
    194 			Object child = adaptor.getChild(t, i);
    195 			_index(child, m);
    196 		}
    197 	}
    198 
    199 	/** Return a List of tree nodes with token type ttype */
    200 	public List find(Object t, int ttype) {
    201 		final List nodes = new ArrayList();
    202 		visit(t, ttype, new TreeWizard.Visitor() {
    203 			public void visit(Object t) {
    204 				nodes.add(t);
    205 			}
    206 		});
    207 		return nodes;
    208 	}
    209 
    210 	/** Return a List of subtrees matching pattern. */
    211 	public List find(Object t, String pattern) {
    212 		final List subtrees = new ArrayList();
    213 		// Create a TreePattern from the pattern
    214 		TreePatternLexer tokenizer = new TreePatternLexer(pattern);
    215 		TreePatternParser parser =
    216 			new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor());
    217 		final TreePattern tpattern = (TreePattern)parser.pattern();
    218 		// don't allow invalid patterns
    219 		if ( tpattern==null ||
    220 			 tpattern.isNil() ||
    221 			 tpattern.getClass()==WildcardTreePattern.class )
    222 		{
    223 			return null;
    224 		}
    225 		int rootTokenType = tpattern.getType();
    226 		visit(t, rootTokenType, new TreeWizard.ContextVisitor() {
    227 			public void visit(Object t, Object parent, int childIndex, Map labels) {
    228 				if ( _parse(t, tpattern, null) ) {
    229 					subtrees.add(t);
    230 				}
    231 			}
    232 		});
    233 		return subtrees;
    234 	}
    235 
    236 	public Object findFirst(Object t, int ttype) {
    237 		return null;
    238 	}
    239 
    240 	public Object findFirst(Object t, String pattern) {
    241 		return null;
    242 	}
    243 
    244 	/** Visit every ttype node in t, invoking the visitor.  This is a quicker
    245 	 *  version of the general visit(t, pattern) method.  The labels arg
    246 	 *  of the visitor action method is never set (it's null) since using
    247 	 *  a token type rather than a pattern doesn't let us set a label.
    248 	 */
    249 	public void visit(Object t, int ttype, ContextVisitor visitor) {
    250 		_visit(t, null, 0, ttype, visitor);
    251 	}
    252 
    253 	/** Do the recursive work for visit */
    254 	protected void _visit(Object t, Object parent, int childIndex, int ttype, ContextVisitor visitor) {
    255 		if ( t==null ) {
    256 			return;
    257 		}
    258 		if ( adaptor.getType(t)==ttype ) {
    259 			visitor.visit(t, parent, childIndex, null);
    260 		}
    261 		int n = adaptor.getChildCount(t);
    262 		for (int i=0; i<n; i++) {
    263 			Object child = adaptor.getChild(t, i);
    264 			_visit(child, t, i, ttype, visitor);
    265 		}
    266 	}
    267 
    268 	/** For all subtrees that match the pattern, execute the visit action.
    269 	 *  The implementation uses the root node of the pattern in combination
    270 	 *  with visit(t, ttype, visitor) so nil-rooted patterns are not allowed.
    271 	 *  Patterns with wildcard roots are also not allowed.
    272 	 */
    273 	public void visit(Object t, final String pattern, final ContextVisitor visitor) {
    274 		// Create a TreePattern from the pattern
    275 		TreePatternLexer tokenizer = new TreePatternLexer(pattern);
    276 		TreePatternParser parser =
    277 			new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor());
    278 		final TreePattern tpattern = (TreePattern)parser.pattern();
    279 		// don't allow invalid patterns
    280 		if ( tpattern==null ||
    281 			 tpattern.isNil() ||
    282 			 tpattern.getClass()==WildcardTreePattern.class )
    283 		{
    284 			return;
    285 		}
    286 		final Map labels = new HashMap(); // reused for each _parse
    287 		int rootTokenType = tpattern.getType();
    288 		visit(t, rootTokenType, new TreeWizard.ContextVisitor() {
    289 			public void visit(Object t, Object parent, int childIndex, Map unusedlabels) {
    290 				// the unusedlabels arg is null as visit on token type doesn't set.
    291 				labels.clear();
    292 				if ( _parse(t, tpattern, labels) ) {
    293 					visitor.visit(t, parent, childIndex, labels);
    294 				}
    295 			}
    296 		});
    297 	}
    298 
    299 	/** Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels
    300 	 *  on the various nodes and '.' (dot) as the node/subtree wildcard,
    301 	 *  return true if the pattern matches and fill the labels Map with
    302 	 *  the labels pointing at the appropriate nodes.  Return false if
    303 	 *  the pattern is malformed or the tree does not match.
    304 	 *
    305 	 *  If a node specifies a text arg in pattern, then that must match
    306 	 *  for that node in t.
    307 	 *
    308 	 *  TODO: what's a better way to indicate bad pattern? Exceptions are a hassle
    309 	 */
    310 	public boolean parse(Object t, String pattern, Map labels) {
    311 		TreePatternLexer tokenizer = new TreePatternLexer(pattern);
    312 		TreePatternParser parser =
    313 			new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor());
    314 		TreePattern tpattern = (TreePattern)parser.pattern();
    315 		/*
    316 		System.out.println("t="+((Tree)t).toStringTree());
    317 		System.out.println("scant="+tpattern.toStringTree());
    318 		*/
    319 		boolean matched = _parse(t, tpattern, labels);
    320 		return matched;
    321 	}
    322 
    323 	public boolean parse(Object t, String pattern) {
    324 		return parse(t, pattern, null);
    325 	}
    326 
    327 	/** Do the work for parse. Check to see if the t2 pattern fits the
    328 	 *  structure and token types in t1.  Check text if the pattern has
    329 	 *  text arguments on nodes.  Fill labels map with pointers to nodes
    330 	 *  in tree matched against nodes in pattern with labels.
    331 	 */
    332 	protected boolean _parse(Object t1, TreePattern tpattern, Map labels) {
    333 		// make sure both are non-null
    334 		if ( t1==null || tpattern==null ) {
    335 			return false;
    336 		}
    337 		// check roots (wildcard matches anything)
    338 		if ( tpattern.getClass() != WildcardTreePattern.class ) {
    339 			if ( adaptor.getType(t1) != tpattern.getType() ) return false;
    340             // if pattern has text, check node text
    341 			if ( tpattern.hasTextArg && !adaptor.getText(t1).equals(tpattern.getText()) ) {
    342 				return false;
    343 			}
    344 		}
    345 		if ( tpattern.label!=null && labels!=null ) {
    346 			// map label in pattern to node in t1
    347 			labels.put(tpattern.label, t1);
    348 		}
    349 		// check children
    350 		int n1 = adaptor.getChildCount(t1);
    351 		int n2 = tpattern.getChildCount();
    352 		if ( n1 != n2 ) {
    353 			return false;
    354 		}
    355 		for (int i=0; i<n1; i++) {
    356 			Object child1 = adaptor.getChild(t1, i);
    357 			TreePattern child2 = (TreePattern)tpattern.getChild(i);
    358 			if ( !_parse(child1, child2, labels) ) {
    359 				return false;
    360 			}
    361 		}
    362 		return true;
    363 	}
    364 
    365 	/** Create a tree or node from the indicated tree pattern that closely
    366 	 *  follows ANTLR tree grammar tree element syntax:
    367 	 *
    368 	 * 		(root child1 ... child2).
    369 	 *
    370 	 *  You can also just pass in a node: ID
    371 	 *
    372 	 *  Any node can have a text argument: ID[foo]
    373 	 *  (notice there are no quotes around foo--it's clear it's a string).
    374 	 *
    375 	 *  nil is a special name meaning "give me a nil node".  Useful for
    376 	 *  making lists: (nil A B C) is a list of A B C.
    377  	 */
    378 	public Object create(String pattern) {
    379 		TreePatternLexer tokenizer = new TreePatternLexer(pattern);
    380 		TreePatternParser parser = new TreePatternParser(tokenizer, this, adaptor);
    381 		Object t = parser.pattern();
    382 		return t;
    383 	}
    384 
    385 	/** Compare t1 and t2; return true if token types/text, structure match exactly.
    386 	 *  The trees are examined in their entirety so that (A B) does not match
    387 	 *  (A B C) nor (A (B C)).
    388 	 // TODO: allow them to pass in a comparator
    389 	 *  TODO: have a version that is nonstatic so it can use instance adaptor
    390 	 *
    391 	 *  I cannot rely on the tree node's equals() implementation as I make
    392 	 *  no constraints at all on the node types nor interface etc...
    393 	 */
    394 	public static boolean equals(Object t1, Object t2, TreeAdaptor adaptor) {
    395 		return _equals(t1, t2, adaptor);
    396 	}
    397 
    398 	/** Compare type, structure, and text of two trees, assuming adaptor in
    399 	 *  this instance of a TreeWizard.
    400 	 */
    401 	public boolean equals(Object t1, Object t2) {
    402 		return _equals(t1, t2, adaptor);
    403 	}
    404 
    405 	protected static boolean _equals(Object t1, Object t2, TreeAdaptor adaptor) {
    406 		// make sure both are non-null
    407 		if ( t1==null || t2==null ) {
    408 			return false;
    409 		}
    410 		// check roots
    411 		if ( adaptor.getType(t1) != adaptor.getType(t2) ) {
    412 			return false;
    413 		}
    414 		if ( !adaptor.getText(t1).equals(adaptor.getText(t2)) ) {
    415 			return false;
    416 		}
    417 		// check children
    418 		int n1 = adaptor.getChildCount(t1);
    419 		int n2 = adaptor.getChildCount(t2);
    420 		if ( n1 != n2 ) {
    421 			return false;
    422 		}
    423 		for (int i=0; i<n1; i++) {
    424 			Object child1 = adaptor.getChild(t1, i);
    425 			Object child2 = adaptor.getChild(t2, i);
    426 			if ( !_equals(child1, child2, adaptor) ) {
    427 				return false;
    428 			}
    429 		}
    430 		return true;
    431 	}
    432 
    433 	// TODO: next stuff taken from CommonTreeNodeStream
    434 
    435 		/** Given a node, add this to the reverse index tokenTypeToStreamIndexesMap.
    436 	 *  You can override this method to alter how indexing occurs.  The
    437 	 *  default is to create a
    438 	 *
    439 	 *    Map<Integer token type,ArrayList<Integer stream index>>
    440 	 *
    441 	 *  This data structure allows you to find all nodes with type INT in order.
    442 	 *
    443 	 *  If you really need to find a node of type, say, FUNC quickly then perhaps
    444 	 *
    445 	 *    Map<Integertoken type,Map<Object tree node,Integer stream index>>
    446 	 *
    447 	 *  would be better for you.  The interior maps map a tree node to
    448 	 *  the index so you don't have to search linearly for a specific node.
    449 	 *
    450 	 *  If you change this method, you will likely need to change
    451 	 *  getNodeIndex(), which extracts information.
    452 	protected void fillReverseIndex(Object node, int streamIndex) {
    453 		//System.out.println("revIndex "+node+"@"+streamIndex);
    454 		if ( tokenTypesToReverseIndex==null ) {
    455 			return; // no indexing if this is empty (nothing of interest)
    456 		}
    457 		if ( tokenTypeToStreamIndexesMap==null ) {
    458 			tokenTypeToStreamIndexesMap = new HashMap(); // first indexing op
    459 		}
    460 		int tokenType = adaptor.getType(node);
    461 		Integer tokenTypeI = new Integer(tokenType);
    462 		if ( !(tokenTypesToReverseIndex==INDEX_ALL ||
    463 			   tokenTypesToReverseIndex.contains(tokenTypeI)) )
    464 		{
    465 			return; // tokenType not of interest
    466 		}
    467 		Integer streamIndexI = new Integer(streamIndex);
    468 		ArrayList indexes = (ArrayList)tokenTypeToStreamIndexesMap.get(tokenTypeI);
    469 		if ( indexes==null ) {
    470 			indexes = new ArrayList(); // no list yet for this token type
    471 			indexes.add(streamIndexI); // not there yet, add
    472 			tokenTypeToStreamIndexesMap.put(tokenTypeI, indexes);
    473 		}
    474 		else {
    475 			if ( !indexes.contains(streamIndexI) ) {
    476 				indexes.add(streamIndexI); // not there yet, add
    477 			}
    478 		}
    479 	}
    480 
    481 	/** Track the indicated token type in the reverse index.  Call this
    482 	 *  repeatedly for each type or use variant with Set argument to
    483 	 *  set all at once.
    484 	 * @param tokenType
    485 	public void reverseIndex(int tokenType) {
    486 		if ( tokenTypesToReverseIndex==null ) {
    487 			tokenTypesToReverseIndex = new HashSet();
    488 		}
    489 		else if ( tokenTypesToReverseIndex==INDEX_ALL ) {
    490 			return;
    491 		}
    492 		tokenTypesToReverseIndex.add(new Integer(tokenType));
    493 	}
    494 
    495 	/** Track the indicated token types in the reverse index. Set
    496 	 *  to INDEX_ALL to track all token types.
    497 	public void reverseIndex(Set tokenTypes) {
    498 		tokenTypesToReverseIndex = tokenTypes;
    499 	}
    500 
    501 	/** Given a node pointer, return its index into the node stream.
    502 	 *  This is not its Token stream index.  If there is no reverse map
    503 	 *  from node to stream index or the map does not contain entries
    504 	 *  for node's token type, a linear search of entire stream is used.
    505 	 *
    506 	 *  Return -1 if exact node pointer not in stream.
    507 	public int getNodeIndex(Object node) {
    508 		//System.out.println("get "+node);
    509 		if ( tokenTypeToStreamIndexesMap==null ) {
    510 			return getNodeIndexLinearly(node);
    511 		}
    512 		int tokenType = adaptor.getType(node);
    513 		Integer tokenTypeI = new Integer(tokenType);
    514 		ArrayList indexes = (ArrayList)tokenTypeToStreamIndexesMap.get(tokenTypeI);
    515 		if ( indexes==null ) {
    516 			//System.out.println("found linearly; stream index = "+getNodeIndexLinearly(node));
    517 			return getNodeIndexLinearly(node);
    518 		}
    519 		for (int i = 0; i < indexes.size(); i++) {
    520 			Integer streamIndexI = (Integer)indexes.get(i);
    521 			Object n = get(streamIndexI.intValue());
    522 			if ( n==node ) {
    523 				//System.out.println("found in index; stream index = "+streamIndexI);
    524 				return streamIndexI.intValue(); // found it!
    525 			}
    526 		}
    527 		return -1;
    528 	}
    529 
    530 	*/
    531 }
    532