1 /* 2 [The "BSD license"] 3 Copyright (c) 2005-2009 Terence Parr 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 1. Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 2. Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 3. The name of the author may not be used to endorse or promote products 15 derived from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.runtime.tree; 29 30 import org.antlr.runtime.Token; 31 32 import java.util.ArrayList; 33 import java.util.HashMap; 34 import java.util.List; 35 import java.util.Map; 36 37 /** Build and navigate trees with this object. Must know about the names 38 * of tokens so you have to pass in a map or array of token names (from which 39 * this class can build the map). I.e., Token DECL means nothing unless the 40 * class can translate it to a token type. 41 * 42 * In order to create nodes and navigate, this class needs a TreeAdaptor. 43 * 44 * This class can build a token type -> node index for repeated use or for 45 * iterating over the various nodes with a particular type. 46 * 47 * This class works in conjunction with the TreeAdaptor rather than moving 48 * all this functionality into the adaptor. An adaptor helps build and 49 * navigate trees using methods. This class helps you do it with string 50 * patterns like "(A B C)". You can create a tree from that pattern or 51 * match subtrees against it. 52 */ 53 public class TreeWizard { 54 protected TreeAdaptor adaptor; 55 protected Map tokenNameToTypeMap; 56 57 public interface ContextVisitor { 58 // TODO: should this be called visit or something else? 59 public void visit(Object t, Object parent, int childIndex, Map labels); 60 } 61 62 public static abstract class Visitor implements ContextVisitor { 63 public void visit(Object t, Object parent, int childIndex, Map labels) { 64 visit(t); 65 } 66 public abstract void visit(Object t); 67 } 68 69 /** When using %label:TOKENNAME in a tree for parse(), we must 70 * track the label. 71 */ 72 public static class TreePattern extends CommonTree { 73 public String label; 74 public boolean hasTextArg; 75 public TreePattern(Token payload) { 76 super(payload); 77 } 78 public String toString() { 79 if ( label!=null ) { 80 return "%"+label+":"+super.toString(); 81 } 82 else { 83 return super.toString(); 84 } 85 } 86 } 87 88 public static class WildcardTreePattern extends TreePattern { 89 public WildcardTreePattern(Token payload) { 90 super(payload); 91 } 92 } 93 94 /** This adaptor creates TreePattern objects for use during scan() */ 95 public static class TreePatternTreeAdaptor extends CommonTreeAdaptor { 96 public Object create(Token payload) { 97 return new TreePattern(payload); 98 } 99 } 100 101 // TODO: build indexes for the wizard 102 103 /** During fillBuffer(), we can make a reverse index from a set 104 * of token types of interest to the list of indexes into the 105 * node stream. This lets us convert a node pointer to a 106 * stream index semi-efficiently for a list of interesting 107 * nodes such as function definition nodes (you'll want to seek 108 * to their bodies for an interpreter). Also useful for doing 109 * dynamic searches; i.e., go find me all PLUS nodes. 110 protected Map tokenTypeToStreamIndexesMap; 111 112 /** If tokenTypesToReverseIndex set to INDEX_ALL then indexing 113 * occurs for all token types. 114 public static final Set INDEX_ALL = new HashSet(); 115 116 /** A set of token types user would like to index for faster lookup. 117 * If this is INDEX_ALL, then all token types are tracked. If null, 118 * then none are indexed. 119 protected Set tokenTypesToReverseIndex = null; 120 */ 121 122 public TreeWizard(TreeAdaptor adaptor) { 123 this.adaptor = adaptor; 124 } 125 126 public TreeWizard(TreeAdaptor adaptor, Map tokenNameToTypeMap) { 127 this.adaptor = adaptor; 128 this.tokenNameToTypeMap = tokenNameToTypeMap; 129 } 130 131 public TreeWizard(TreeAdaptor adaptor, String[] tokenNames) { 132 this.adaptor = adaptor; 133 this.tokenNameToTypeMap = computeTokenTypes(tokenNames); 134 } 135 136 public TreeWizard(String[] tokenNames) { 137 this(new CommonTreeAdaptor(), tokenNames); 138 } 139 140 /** Compute a Map<String, Integer> that is an inverted index of 141 * tokenNames (which maps int token types to names). 142 */ 143 public Map computeTokenTypes(String[] tokenNames) { 144 Map m = new HashMap(); 145 if ( tokenNames==null ) { 146 return m; 147 } 148 for (int ttype = Token.MIN_TOKEN_TYPE; ttype < tokenNames.length; ttype++) { 149 String name = tokenNames[ttype]; 150 m.put(name, new Integer(ttype)); 151 } 152 return m; 153 } 154 155 /** Using the map of token names to token types, return the type. */ 156 public int getTokenType(String tokenName) { 157 if ( tokenNameToTypeMap==null ) { 158 return Token.INVALID_TOKEN_TYPE; 159 } 160 Integer ttypeI = (Integer)tokenNameToTypeMap.get(tokenName); 161 if ( ttypeI!=null ) { 162 return ttypeI.intValue(); 163 } 164 return Token.INVALID_TOKEN_TYPE; 165 } 166 167 /** Walk the entire tree and make a node name to nodes mapping. 168 * For now, use recursion but later nonrecursive version may be 169 * more efficient. Returns Map<Integer, List> where the List is 170 * of your AST node type. The Integer is the token type of the node. 171 * 172 * TODO: save this index so that find and visit are faster 173 */ 174 public Map index(Object t) { 175 Map m = new HashMap(); 176 _index(t, m); 177 return m; 178 } 179 180 /** Do the work for index */ 181 protected void _index(Object t, Map m) { 182 if ( t==null ) { 183 return; 184 } 185 int ttype = adaptor.getType(t); 186 List elements = (List)m.get(new Integer(ttype)); 187 if ( elements==null ) { 188 elements = new ArrayList(); 189 m.put(new Integer(ttype), elements); 190 } 191 elements.add(t); 192 int n = adaptor.getChildCount(t); 193 for (int i=0; i<n; i++) { 194 Object child = adaptor.getChild(t, i); 195 _index(child, m); 196 } 197 } 198 199 /** Return a List of tree nodes with token type ttype */ 200 public List find(Object t, int ttype) { 201 final List nodes = new ArrayList(); 202 visit(t, ttype, new TreeWizard.Visitor() { 203 public void visit(Object t) { 204 nodes.add(t); 205 } 206 }); 207 return nodes; 208 } 209 210 /** Return a List of subtrees matching pattern. */ 211 public List find(Object t, String pattern) { 212 final List subtrees = new ArrayList(); 213 // Create a TreePattern from the pattern 214 TreePatternLexer tokenizer = new TreePatternLexer(pattern); 215 TreePatternParser parser = 216 new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor()); 217 final TreePattern tpattern = (TreePattern)parser.pattern(); 218 // don't allow invalid patterns 219 if ( tpattern==null || 220 tpattern.isNil() || 221 tpattern.getClass()==WildcardTreePattern.class ) 222 { 223 return null; 224 } 225 int rootTokenType = tpattern.getType(); 226 visit(t, rootTokenType, new TreeWizard.ContextVisitor() { 227 public void visit(Object t, Object parent, int childIndex, Map labels) { 228 if ( _parse(t, tpattern, null) ) { 229 subtrees.add(t); 230 } 231 } 232 }); 233 return subtrees; 234 } 235 236 public Object findFirst(Object t, int ttype) { 237 return null; 238 } 239 240 public Object findFirst(Object t, String pattern) { 241 return null; 242 } 243 244 /** Visit every ttype node in t, invoking the visitor. This is a quicker 245 * version of the general visit(t, pattern) method. The labels arg 246 * of the visitor action method is never set (it's null) since using 247 * a token type rather than a pattern doesn't let us set a label. 248 */ 249 public void visit(Object t, int ttype, ContextVisitor visitor) { 250 _visit(t, null, 0, ttype, visitor); 251 } 252 253 /** Do the recursive work for visit */ 254 protected void _visit(Object t, Object parent, int childIndex, int ttype, ContextVisitor visitor) { 255 if ( t==null ) { 256 return; 257 } 258 if ( adaptor.getType(t)==ttype ) { 259 visitor.visit(t, parent, childIndex, null); 260 } 261 int n = adaptor.getChildCount(t); 262 for (int i=0; i<n; i++) { 263 Object child = adaptor.getChild(t, i); 264 _visit(child, t, i, ttype, visitor); 265 } 266 } 267 268 /** For all subtrees that match the pattern, execute the visit action. 269 * The implementation uses the root node of the pattern in combination 270 * with visit(t, ttype, visitor) so nil-rooted patterns are not allowed. 271 * Patterns with wildcard roots are also not allowed. 272 */ 273 public void visit(Object t, final String pattern, final ContextVisitor visitor) { 274 // Create a TreePattern from the pattern 275 TreePatternLexer tokenizer = new TreePatternLexer(pattern); 276 TreePatternParser parser = 277 new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor()); 278 final TreePattern tpattern = (TreePattern)parser.pattern(); 279 // don't allow invalid patterns 280 if ( tpattern==null || 281 tpattern.isNil() || 282 tpattern.getClass()==WildcardTreePattern.class ) 283 { 284 return; 285 } 286 final Map labels = new HashMap(); // reused for each _parse 287 int rootTokenType = tpattern.getType(); 288 visit(t, rootTokenType, new TreeWizard.ContextVisitor() { 289 public void visit(Object t, Object parent, int childIndex, Map unusedlabels) { 290 // the unusedlabels arg is null as visit on token type doesn't set. 291 labels.clear(); 292 if ( _parse(t, tpattern, labels) ) { 293 visitor.visit(t, parent, childIndex, labels); 294 } 295 } 296 }); 297 } 298 299 /** Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels 300 * on the various nodes and '.' (dot) as the node/subtree wildcard, 301 * return true if the pattern matches and fill the labels Map with 302 * the labels pointing at the appropriate nodes. Return false if 303 * the pattern is malformed or the tree does not match. 304 * 305 * If a node specifies a text arg in pattern, then that must match 306 * for that node in t. 307 * 308 * TODO: what's a better way to indicate bad pattern? Exceptions are a hassle 309 */ 310 public boolean parse(Object t, String pattern, Map labels) { 311 TreePatternLexer tokenizer = new TreePatternLexer(pattern); 312 TreePatternParser parser = 313 new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor()); 314 TreePattern tpattern = (TreePattern)parser.pattern(); 315 /* 316 System.out.println("t="+((Tree)t).toStringTree()); 317 System.out.println("scant="+tpattern.toStringTree()); 318 */ 319 boolean matched = _parse(t, tpattern, labels); 320 return matched; 321 } 322 323 public boolean parse(Object t, String pattern) { 324 return parse(t, pattern, null); 325 } 326 327 /** Do the work for parse. Check to see if the t2 pattern fits the 328 * structure and token types in t1. Check text if the pattern has 329 * text arguments on nodes. Fill labels map with pointers to nodes 330 * in tree matched against nodes in pattern with labels. 331 */ 332 protected boolean _parse(Object t1, TreePattern tpattern, Map labels) { 333 // make sure both are non-null 334 if ( t1==null || tpattern==null ) { 335 return false; 336 } 337 // check roots (wildcard matches anything) 338 if ( tpattern.getClass() != WildcardTreePattern.class ) { 339 if ( adaptor.getType(t1) != tpattern.getType() ) return false; 340 // if pattern has text, check node text 341 if ( tpattern.hasTextArg && !adaptor.getText(t1).equals(tpattern.getText()) ) { 342 return false; 343 } 344 } 345 if ( tpattern.label!=null && labels!=null ) { 346 // map label in pattern to node in t1 347 labels.put(tpattern.label, t1); 348 } 349 // check children 350 int n1 = adaptor.getChildCount(t1); 351 int n2 = tpattern.getChildCount(); 352 if ( n1 != n2 ) { 353 return false; 354 } 355 for (int i=0; i<n1; i++) { 356 Object child1 = adaptor.getChild(t1, i); 357 TreePattern child2 = (TreePattern)tpattern.getChild(i); 358 if ( !_parse(child1, child2, labels) ) { 359 return false; 360 } 361 } 362 return true; 363 } 364 365 /** Create a tree or node from the indicated tree pattern that closely 366 * follows ANTLR tree grammar tree element syntax: 367 * 368 * (root child1 ... child2). 369 * 370 * You can also just pass in a node: ID 371 * 372 * Any node can have a text argument: ID[foo] 373 * (notice there are no quotes around foo--it's clear it's a string). 374 * 375 * nil is a special name meaning "give me a nil node". Useful for 376 * making lists: (nil A B C) is a list of A B C. 377 */ 378 public Object create(String pattern) { 379 TreePatternLexer tokenizer = new TreePatternLexer(pattern); 380 TreePatternParser parser = new TreePatternParser(tokenizer, this, adaptor); 381 Object t = parser.pattern(); 382 return t; 383 } 384 385 /** Compare t1 and t2; return true if token types/text, structure match exactly. 386 * The trees are examined in their entirety so that (A B) does not match 387 * (A B C) nor (A (B C)). 388 // TODO: allow them to pass in a comparator 389 * TODO: have a version that is nonstatic so it can use instance adaptor 390 * 391 * I cannot rely on the tree node's equals() implementation as I make 392 * no constraints at all on the node types nor interface etc... 393 */ 394 public static boolean equals(Object t1, Object t2, TreeAdaptor adaptor) { 395 return _equals(t1, t2, adaptor); 396 } 397 398 /** Compare type, structure, and text of two trees, assuming adaptor in 399 * this instance of a TreeWizard. 400 */ 401 public boolean equals(Object t1, Object t2) { 402 return _equals(t1, t2, adaptor); 403 } 404 405 protected static boolean _equals(Object t1, Object t2, TreeAdaptor adaptor) { 406 // make sure both are non-null 407 if ( t1==null || t2==null ) { 408 return false; 409 } 410 // check roots 411 if ( adaptor.getType(t1) != adaptor.getType(t2) ) { 412 return false; 413 } 414 if ( !adaptor.getText(t1).equals(adaptor.getText(t2)) ) { 415 return false; 416 } 417 // check children 418 int n1 = adaptor.getChildCount(t1); 419 int n2 = adaptor.getChildCount(t2); 420 if ( n1 != n2 ) { 421 return false; 422 } 423 for (int i=0; i<n1; i++) { 424 Object child1 = adaptor.getChild(t1, i); 425 Object child2 = adaptor.getChild(t2, i); 426 if ( !_equals(child1, child2, adaptor) ) { 427 return false; 428 } 429 } 430 return true; 431 } 432 433 // TODO: next stuff taken from CommonTreeNodeStream 434 435 /** Given a node, add this to the reverse index tokenTypeToStreamIndexesMap. 436 * You can override this method to alter how indexing occurs. The 437 * default is to create a 438 * 439 * Map<Integer token type,ArrayList<Integer stream index>> 440 * 441 * This data structure allows you to find all nodes with type INT in order. 442 * 443 * If you really need to find a node of type, say, FUNC quickly then perhaps 444 * 445 * Map<Integertoken type,Map<Object tree node,Integer stream index>> 446 * 447 * would be better for you. The interior maps map a tree node to 448 * the index so you don't have to search linearly for a specific node. 449 * 450 * If you change this method, you will likely need to change 451 * getNodeIndex(), which extracts information. 452 protected void fillReverseIndex(Object node, int streamIndex) { 453 //System.out.println("revIndex "+node+"@"+streamIndex); 454 if ( tokenTypesToReverseIndex==null ) { 455 return; // no indexing if this is empty (nothing of interest) 456 } 457 if ( tokenTypeToStreamIndexesMap==null ) { 458 tokenTypeToStreamIndexesMap = new HashMap(); // first indexing op 459 } 460 int tokenType = adaptor.getType(node); 461 Integer tokenTypeI = new Integer(tokenType); 462 if ( !(tokenTypesToReverseIndex==INDEX_ALL || 463 tokenTypesToReverseIndex.contains(tokenTypeI)) ) 464 { 465 return; // tokenType not of interest 466 } 467 Integer streamIndexI = new Integer(streamIndex); 468 ArrayList indexes = (ArrayList)tokenTypeToStreamIndexesMap.get(tokenTypeI); 469 if ( indexes==null ) { 470 indexes = new ArrayList(); // no list yet for this token type 471 indexes.add(streamIndexI); // not there yet, add 472 tokenTypeToStreamIndexesMap.put(tokenTypeI, indexes); 473 } 474 else { 475 if ( !indexes.contains(streamIndexI) ) { 476 indexes.add(streamIndexI); // not there yet, add 477 } 478 } 479 } 480 481 /** Track the indicated token type in the reverse index. Call this 482 * repeatedly for each type or use variant with Set argument to 483 * set all at once. 484 * @param tokenType 485 public void reverseIndex(int tokenType) { 486 if ( tokenTypesToReverseIndex==null ) { 487 tokenTypesToReverseIndex = new HashSet(); 488 } 489 else if ( tokenTypesToReverseIndex==INDEX_ALL ) { 490 return; 491 } 492 tokenTypesToReverseIndex.add(new Integer(tokenType)); 493 } 494 495 /** Track the indicated token types in the reverse index. Set 496 * to INDEX_ALL to track all token types. 497 public void reverseIndex(Set tokenTypes) { 498 tokenTypesToReverseIndex = tokenTypes; 499 } 500 501 /** Given a node pointer, return its index into the node stream. 502 * This is not its Token stream index. If there is no reverse map 503 * from node to stream index or the map does not contain entries 504 * for node's token type, a linear search of entire stream is used. 505 * 506 * Return -1 if exact node pointer not in stream. 507 public int getNodeIndex(Object node) { 508 //System.out.println("get "+node); 509 if ( tokenTypeToStreamIndexesMap==null ) { 510 return getNodeIndexLinearly(node); 511 } 512 int tokenType = adaptor.getType(node); 513 Integer tokenTypeI = new Integer(tokenType); 514 ArrayList indexes = (ArrayList)tokenTypeToStreamIndexesMap.get(tokenTypeI); 515 if ( indexes==null ) { 516 //System.out.println("found linearly; stream index = "+getNodeIndexLinearly(node)); 517 return getNodeIndexLinearly(node); 518 } 519 for (int i = 0; i < indexes.size(); i++) { 520 Integer streamIndexI = (Integer)indexes.get(i); 521 Object n = get(streamIndexI.intValue()); 522 if ( n==node ) { 523 //System.out.println("found in index; stream index = "+streamIndexI); 524 return streamIndexI.intValue(); // found it! 525 } 526 } 527 return -1; 528 } 529 530 */ 531 } 532