1 #!/usr/bin/ruby 2 # encoding: utf-8 3 4 =begin LICENSE 5 6 [The "BSD licence"] 7 Copyright (c) 2009-2010 Kyle Yetter 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in the 18 documentation and/or other materials provided with the distribution. 19 3. The name of the author may not be used to endorse or promote products 20 derived from this software without specific prior written permission. 21 22 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 33 =end 34 35 require 'antlr3' 36 37 module ANTLR3 38 module AST 39 40 =begin rdoc ANTLR3::AST::Wizard 41 42 AST::Wizard is an extra utility class that allows quick creation of AST objects 43 using definitions writing in ANTLR-style tree definition syntax. It can also 44 define <i>tree patterns</i>, objects that are conceptually similar to regular 45 expressions. Patterns allow a simple method for recursively searching through an 46 AST for a particular node structure. These features make tree wizards useful 47 while testing and debugging AST constructing parsers and tree parsers. This 48 library has been ported to Ruby directly from the ANTLR Python runtime API. 49 50 See 51 http://www.antlr.org/wiki/display/~admin/2007/07/02/Exploring+Concept+of+TreeWizard 52 for more background on the concept of a tree wizard. 53 54 == Usage 55 56 # setting up and creating a tree wizard 57 token_names = Array.new(4, '') + %w(VAR NUMBER EQ PLUS MINUS MULT DIV) 58 adaptor = ANTLR3::AST::CommonTreeAdaptor.new 59 wizard = ANTLR3::AST::Wizard.new(adaptor, token_names) 60 61 # building trees 62 lone_node = wizard.create "VAR[x]" # => x 63 lone_node.type # => 4 # = VAR 64 lone_node.text # => "x" 65 66 expression_node = wizard.create "(MINUS VAR NUMBER)" 67 # => (MINUS VAR NUMBER) 68 statement_node = wizard.create "(EQ[=] VAR[x] (PLUS[+] NUMBER[1] NUMBER[2]))" 69 # => (= x (+ 1 2)) 70 deep_node = wizard.create(<<-TREE) 71 (MULT[*] NUMBER[1] 72 (MINUS[-] 73 (MULT[*] NUMBER[3] VAR[x]) 74 (DIV[/] VAR[y] NUMBER[3.14]) 75 (MULT[*] NUMBER[4] VAR[z]) 76 ) 77 ) 78 TREE 79 # => (* 1 (- (* 3 x) (/ y 3.14) (* 4 z)) 80 81 bad_tree_syntax = wizard.create "(+ 1 2)" 82 # => nil - invalid node names 83 84 # test whether a tree matches a pattern 85 wizard.match(expression_node, '(MINUS VAR .)') # => true 86 wizard.match(lone_node, 'NUMBER NUMBER') # => false 87 88 # extract nodes matching a pattern 89 wizard.find(statement_node, '(PLUS . .)') 90 # => [(+ 1 2)] 91 wizard.find(deep_node, 4) # where 4 is the value of type VAR 92 # => [x, y, z] 93 94 # iterate through the tree and extract nodes with pattern labels 95 wizard.visit(deep_node, '(MULT %n:NUMBER %v:.)') do |node, parent, local_index, labels| 96 printf "n = %p\n, v = %p\n", labels['n'], labels['v'] 97 end 98 # => prints out: 99 # n = 3, v = x 100 # n = 4, v = z 101 102 == Tree Construction Syntax 103 104 Simple Token Node: TK 105 Token Node With Text: TK[text] 106 Flat Node List: (nil TK1 TK2) 107 General Node: (RT TK1 TK2) 108 Complex Nested Node: (RT (SUB1[sometext] TK1) TK2 (SUB2 TK3 TK4[moretext])) 109 110 === Additional Syntax for Tree Matching Patterns 111 112 Match Any Token Node: . 113 Label a Node: %name:TK 114 115 =end 116 117 class Wizard 118 119 include Constants 120 include Util 121 122 =begin rdoc ANTLR3::AST::Wizard::PatternLexer 123 124 A class that is used internally by AST::Wizard to tokenize tree patterns 125 126 =end 127 128 class PatternLexer 129 include ANTLR3::Constants 130 131 autoload :StringScanner, 'strscan' 132 133 PATTERNS = [ 134 [ :space, /\s+/ ], 135 [ :identifier, /[a-z_]\w*/i ], 136 [ :open, /\(/ ], 137 [ :close, /\)/ ], 138 [ :percent, /%/ ], 139 [ :colon, /:/ ], 140 [ :dot, /\./ ], 141 [ :argument, /\[((?:[^\[\]\\]|\\\[|\\\]|\\.)*?)\]/ ] 142 ] 143 144 attr_reader :text, :error, :pattern 145 def initialize( pattern ) 146 @pattern = pattern.to_s 147 @scanner = StringScanner.new( pattern ) 148 @text = '' 149 @error = false 150 end 151 152 def next_token 153 begin 154 @scanner.eos? and return EOF 155 156 type, = PATTERNS.find do |type, pattern| 157 @scanner.scan( pattern ) 158 end 159 160 case type 161 when nil 162 type, @text, @error = EOF, '', true 163 break 164 when :identifier then @text = @scanner.matched 165 when :argument 166 # remove escapes from \] sequences in the text argument 167 ( @text = @scanner[ 1 ] ).gsub!( /\\(?=[\[\]])/, '' ) 168 end 169 end while type == :space 170 171 return type 172 end 173 174 alias error? error 175 end 176 177 178 =begin rdoc ANTLR3::AST::Wizard::Pattern 179 180 A class that is used internally by AST::Wizard to construct AST tree objects 181 from a tokenized tree pattern 182 183 =end 184 185 class PatternParser 186 def self.parse( pattern, token_scheme, adaptor ) 187 lexer = PatternLexer.new( pattern ) 188 new( lexer, token_scheme, adaptor ).pattern 189 end 190 191 include ANTLR3::Constants 192 193 def initialize( tokenizer, token_scheme, adaptor ) 194 @tokenizer = tokenizer 195 @token_scheme = token_scheme 196 @adaptor = adaptor 197 @token_type = tokenizer.next_token 198 end 199 200 def pattern 201 case @token_type 202 when :open then return parse_tree 203 when :identifier 204 node = parse_node 205 @token_type == EOF and return node 206 return nil 207 end 208 return nil 209 end 210 211 CONTINUE_TYPES = [ :open, :identifier, :percent, :dot ] 212 213 def parse_tree 214 @token_type != :open and return nil 215 @token_type = @tokenizer.next_token 216 root = parse_node or return nil 217 218 loop do 219 case @token_type 220 when :open 221 subtree = parse_tree 222 @adaptor.add_child( root, subtree ) 223 when :identifier, :percent, :dot 224 child = parse_node or return nil 225 @adaptor.add_child( root, child ) 226 else break 227 end 228 end 229 @token_type == :close or return nil 230 @token_type = @tokenizer.next_token 231 return root 232 end 233 234 def parse_node 235 label = nil 236 if @token_type == :percent 237 ( @token_type = @tokenizer.next_token ) == :identifier or return nil 238 label = @tokenizer.text 239 ( @token_type = @tokenizer.next_token ) == :colon or return nil 240 @token_type = @tokenizer.next_token 241 end 242 243 if @token_type == :dot 244 @token_type = @tokenizer.next_token 245 wildcard_payload = CommonToken.create( :type => 0, :text => '.' ) 246 node = WildcardPattern.new( wildcard_payload ) 247 label and node.label = label 248 return node 249 end 250 251 @token_type == :identifier or return nil 252 token_name = @tokenizer.text 253 @token_type = @tokenizer.next_token 254 token_name == 'nil' and return @adaptor.create_flat_list 255 256 text = token_name 257 arg = nil 258 if @token_type == :argument 259 arg = @tokenizer.text 260 text = arg 261 @token_type = @tokenizer.next_token 262 end 263 264 node_type = @token_scheme[ token_name ] || INVALID_TOKEN_TYPE 265 node = @adaptor.create_from_type( node_type, text ) 266 267 if Pattern === node 268 node.label, node.has_text_arg = label, arg 269 end 270 return node 271 end 272 end 273 274 275 =begin rdoc ANTLR3::AST::Wizard::Pattern 276 277 A simple tree class that represents the skeletal structure of tree. It is used 278 to validate tree structures as well as to extract nodes that match the pattern. 279 280 =end 281 282 class Pattern < CommonTree 283 def self.parse( pattern_str, scheme ) 284 PatternParser.parse( 285 pattern_str, scheme, PatternAdaptor.new( scheme.token_class ) 286 ) 287 end 288 289 attr_accessor :label, :has_text_arg 290 alias :has_text_arg? :has_text_arg 291 292 def initialize( payload ) 293 super( payload ) 294 @label = nil 295 @has_text_arg = nil 296 end 297 298 def to_s 299 prefix = @label ? '%' << @label << ':' : '' 300 return( prefix << super ) 301 end 302 end 303 304 =begin rdoc ANTLR3::AST::Wizard::WildcardPattern 305 306 A simple tree node used to represent the operation "match any tree node type" in 307 a tree pattern. They are represented by '.' in tree pattern specifications. 308 309 =end 310 311 class WildcardPattern < Pattern; end 312 313 314 =begin rdoc ANTLR3::AST::Wizard::PatternAdaptor 315 316 A customized TreeAdaptor used by AST::Wizards to build tree patterns. 317 318 =end 319 320 class PatternAdaptor < CommonTreeAdaptor 321 def create_with_payload( payload ) 322 return Pattern.new( payload ) 323 end 324 end 325 326 attr_accessor :token_scheme, :adaptor 327 328 def initialize( options = {} ) 329 @token_scheme = options.fetch( :token_scheme ) do 330 TokenScheme.build( options[ :token_class ], options[ :tokens ] ) 331 end 332 @adaptor = options.fetch( :adaptor ) do 333 CommonTreeAdaptor.new( @token_scheme.token_class ) 334 end 335 end 336 337 def create( pattern ) 338 PatternParser.parse( pattern, @token_scheme, @adaptor ) 339 end 340 341 def index( tree, map = {} ) 342 tree or return( map ) 343 type = @adaptor.type_of( tree ) 344 elements = map[ type ] ||= [] 345 elements << tree 346 @adaptor.each_child( tree ) { | child | index( child, map ) } 347 return( map ) 348 end 349 350 def find( tree, what ) 351 case what 352 when Integer then find_token_type( tree, what ) 353 when String then find_pattern( tree, what ) 354 when Symbol then find_token_type( tree, @token_scheme[ what ] ) 355 else raise ArgumentError, "search subject must be a token type (integer) or a string" 356 end 357 end 358 359 def find_token_type( tree, type ) 360 nodes = [] 361 visit( tree, type ) { | t, | nodes << t } 362 return nodes 363 end 364 365 def find_pattern( tree, pattern ) 366 subtrees = [] 367 visit_pattern( tree, pattern ) { | t, | subtrees << t } 368 return( subtrees ) 369 end 370 371 def visit( tree, what = nil, &block ) 372 block_given? or return enum_for( :visit, tree, what ) 373 Symbol === what and what = @token_scheme[ what ] 374 case what 375 when nil then visit_all( tree, &block ) 376 when Integer then visit_type( tree, nil, what, &block ) 377 when String then visit_pattern( tree, what, &block ) 378 else raise( ArgumentError, tidy( <<-'END', true ) ) 379 | The 'what' filter argument must be a tree 380 | pattern (String) or a token type (Integer) 381 | -- got #{ what.inspect } 382 END 383 end 384 end 385 386 def visit_all( tree, parent = nil, &block ) 387 index = @adaptor.child_index( tree ) 388 yield( tree, parent, index, nil ) 389 @adaptor.each_child( tree ) do | child | 390 visit_all( child, tree, &block ) 391 end 392 end 393 394 def visit_type( tree, parent, type, &block ) 395 tree.nil? and return( nil ) 396 index = @adaptor.child_index( tree ) 397 @adaptor.type_of( tree ) == type and yield( tree, parent, index, nil ) 398 @adaptor.each_child( tree ) do | child | 399 visit_type( child, tree, type, &block ) 400 end 401 end 402 403 def visit_pattern( tree, pattern, &block ) 404 pattern = Pattern.parse( pattern, @token_scheme ) 405 406 if pattern.nil? or pattern.flat_list? or pattern.is_a?( WildcardPattern ) 407 return( nil ) 408 end 409 410 visit( tree, pattern.type ) do | tree, parent, child_index, labels | 411 labels = match!( tree, pattern ) and 412 yield( tree, parent, child_index, labels ) 413 end 414 end 415 416 def match( tree, pattern ) 417 pattern = Pattern.parse( pattern, @token_scheme ) 418 419 return( match!( tree, pattern ) ) 420 end 421 422 def match!( tree, pattern, labels = {} ) 423 tree.nil? || pattern.nil? and return false 424 unless pattern.is_a? WildcardPattern 425 @adaptor.type_of( tree ) == pattern.type or return false 426 pattern.has_text_arg && ( @adaptor.text_of( tree ) != pattern.text ) and 427 return false 428 end 429 labels[ pattern.label ] = tree if labels && pattern.label 430 431 number_of_children = @adaptor.child_count( tree ) 432 return false unless number_of_children == pattern.child_count 433 434 number_of_children.times do |index| 435 actual_child = @adaptor.child_of( tree, index ) 436 pattern_child = pattern.child( index ) 437 438 return( false ) unless match!( actual_child, pattern_child, labels ) 439 end 440 441 return labels 442 end 443 444 def equals( tree_a, tree_b, adaptor = @adaptor ) 445 tree_a && tree_b or return( false ) 446 447 adaptor.type_of( tree_a ) == adaptor.type_of( tree_b ) or return false 448 adaptor.text_of( tree_a ) == adaptor.text_of( tree_b ) or return false 449 450 child_count_a = adaptor.child_count( tree_a ) 451 child_count_b = adaptor.child_count( tree_b ) 452 child_count_a == child_count_b or return false 453 454 child_count_a.times do | i | 455 child_a = adaptor.child_of( tree_a, i ) 456 child_b = adaptor.child_of( tree_b, i ) 457 equals( child_a, child_b, adaptor ) or return false 458 end 459 return true 460 end 461 462 463 DOT_DOT_PATTERN = /.*[^\.]\\.{2}[^\.].*/ 464 DOUBLE_ETC_PATTERN = /.*\.{3}\s+\.{3}.*/ 465 466 def in_context?( tree, context ) 467 case context 468 when DOT_DOT_PATTERN then raise ArgumentError, "invalid syntax: .." 469 when DOUBLE_ETC_PATTERN then raise ArgumentError, "invalid syntax: ... ..." 470 end 471 472 context = context.gsub( /([^\.\s])\.{3}([^\.])/, '\1 ... \2' ) 473 context.strip! 474 nodes = context.split( /\s+/ ) 475 476 while tree = @adaptor.parent( tree ) and node = nodes.pop 477 if node == '...' 478 node = nodes.pop or return( true ) 479 tree = @adaptor.each_ancestor( tree ).find do | t | 480 @adaptor.type_name( t ) == node 481 end or return( false ) 482 end 483 @adaptor.type_name( tree ) == node or return( false ) 484 end 485 486 return( false ) if tree.nil? and not nodes.empty? 487 return true 488 end 489 490 private :match! 491 end 492 end 493 end 494