Home | History | Annotate | Download | only in tree
      1 #!/usr/bin/ruby
      2 # encoding: utf-8
      3 
      4 =begin LICENSE
      5 
      6 [The "BSD licence"]
      7 Copyright (c) 2009-2010 Kyle Yetter
      8 All rights reserved.
      9 
     10 Redistribution and use in source and binary forms, with or without
     11 modification, are permitted provided that the following conditions
     12 are met:
     13 
     14  1. Redistributions of source code must retain the above copyright
     15     notice, this list of conditions and the following disclaimer.
     16  2. Redistributions in binary form must reproduce the above copyright
     17     notice, this list of conditions and the following disclaimer in the
     18     documentation and/or other materials provided with the distribution.
     19  3. The name of the author may not be used to endorse or promote products
     20     derived from this software without specific prior written permission.
     21 
     22 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     23 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     24 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     25 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     27 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     31 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     32 
     33 =end
     34 
     35 require 'antlr3'
     36 
     37 module ANTLR3
     38 module AST
     39 
     40 =begin rdoc ANTLR3::AST::Wizard
     41 
     42 AST::Wizard is an extra utility class that allows quick creation of AST objects
     43 using definitions writing in ANTLR-style tree definition syntax. It can also
     44 define <i>tree patterns</i>, objects that are conceptually similar to regular
     45 expressions. Patterns allow a simple method for recursively searching through an
     46 AST for a particular node structure. These features make tree wizards useful
     47 while testing and debugging AST constructing parsers and tree parsers. This
     48 library has been ported to Ruby directly from the ANTLR Python runtime API.
     49 
     50 See
     51 http://www.antlr.org/wiki/display/~admin/2007/07/02/Exploring+Concept+of+TreeWizard
     52 for more background on the concept of a tree wizard.
     53 
     54 == Usage
     55 
     56   # setting up and creating a tree wizard
     57   token_names = Array.new(4, '') + %w(VAR NUMBER EQ PLUS MINUS MULT DIV)
     58   adaptor     = ANTLR3::AST::CommonTreeAdaptor.new
     59   wizard      = ANTLR3::AST::Wizard.new(adaptor, token_names)
     60   
     61   # building trees
     62   lone_node = wizard.create "VAR[x]"   # => x
     63   lone_node.type                       # => 4  # = VAR
     64   lone_node.text                       # => "x"
     65   
     66   expression_node = wizard.create "(MINUS VAR NUMBER)"
     67     # => (MINUS VAR NUMBER)
     68   statement_node = wizard.create "(EQ[=] VAR[x] (PLUS[+] NUMBER[1] NUMBER[2]))" 
     69     # => (= x (+ 1 2))
     70   deep_node = wizard.create(<<-TREE)
     71     (MULT[*] NUMBER[1] 
     72       (MINUS[-] 
     73         (MULT[*] NUMBER[3]    VAR[x])
     74         (DIV[/]  VAR[y] NUMBER[3.14])
     75         (MULT[*] NUMBER[4]    VAR[z])
     76       )
     77     )
     78   TREE
     79     # => (* 1 (- (* 3 x) (/ y 3.14) (* 4 z))
     80   
     81   bad_tree_syntax = wizard.create "(+ 1 2)"
     82     # => nil - invalid node names
     83   
     84   # test whether a tree matches a pattern
     85   wizard.match(expression_node, '(MINUS VAR .)') # => true
     86   wizard.match(lone_node, 'NUMBER NUMBER')       # => false
     87   
     88   # extract nodes matching a pattern
     89   wizard.find(statement_node, '(PLUS . .)')
     90   # => [(+ 1 2)]
     91   wizard.find(deep_node, 4)  # where 4 is the value of type VAR
     92   # => [x, y, z]
     93   
     94   # iterate through the tree and extract nodes with pattern labels
     95   wizard.visit(deep_node, '(MULT %n:NUMBER %v:.)') do |node, parent, local_index, labels|
     96     printf "n = %p\n, v = %p\n", labels['n'], labels['v']
     97   end
     98     # => prints out:
     99     # n = 3, v = x
    100     # n = 4, v = z
    101   
    102 == Tree Construction Syntax
    103   
    104   Simple Token Node:     TK
    105   Token Node With Text:  TK[text]
    106   Flat Node List:        (nil TK1 TK2)
    107   General Node:          (RT TK1 TK2)
    108   Complex Nested Node:   (RT (SUB1[sometext] TK1) TK2 (SUB2 TK3 TK4[moretext]))
    109 
    110 === Additional Syntax for Tree Matching Patterns
    111 
    112   Match Any Token Node:  .
    113   Label a Node:          %name:TK
    114 
    115 =end
    116 
    117 class Wizard
    118   
    119   include Constants
    120   include Util
    121 
    122 =begin rdoc ANTLR3::AST::Wizard::PatternLexer
    123 
    124 A class that is used internally by AST::Wizard to tokenize tree patterns
    125 
    126 =end
    127 
    128   class PatternLexer
    129     include ANTLR3::Constants
    130     
    131     autoload :StringScanner, 'strscan'
    132     
    133     PATTERNS = [ 
    134       [ :space, /\s+/ ],
    135       [ :identifier, /[a-z_]\w*/i ],
    136       [ :open, /\(/ ],
    137       [ :close, /\)/ ],
    138       [ :percent, /%/ ],
    139       [ :colon, /:/ ],
    140       [ :dot, /\./ ],
    141       [ :argument, /\[((?:[^\[\]\\]|\\\[|\\\]|\\.)*?)\]/ ]
    142     ]
    143     
    144     attr_reader :text, :error, :pattern
    145     def initialize( pattern )
    146       @pattern = pattern.to_s
    147       @scanner = StringScanner.new( pattern )
    148       @text = ''
    149       @error = false
    150     end
    151     
    152     def next_token
    153       begin
    154         @scanner.eos? and return EOF
    155         
    156         type, = PATTERNS.find do |type, pattern|
    157           @scanner.scan( pattern )
    158         end
    159         
    160         case type
    161         when nil
    162           type, @text, @error = EOF, '', true
    163           break
    164         when :identifier then @text = @scanner.matched
    165         when :argument
    166           # remove escapes from \] sequences in the text argument
    167           ( @text = @scanner[ 1 ] ).gsub!( /\\(?=[\[\]])/, '' )
    168         end
    169       end while type == :space
    170       
    171       return type
    172     end
    173     
    174     alias error? error
    175   end
    176   
    177 
    178 =begin rdoc ANTLR3::AST::Wizard::Pattern
    179 
    180 A class that is used internally by AST::Wizard to construct AST tree objects
    181 from a tokenized tree pattern
    182 
    183 =end
    184 
    185   class PatternParser
    186     def self.parse( pattern, token_scheme, adaptor )
    187       lexer = PatternLexer.new( pattern )
    188       new( lexer, token_scheme, adaptor ).pattern
    189     end
    190     
    191     include ANTLR3::Constants
    192     
    193     def initialize( tokenizer, token_scheme, adaptor )
    194       @tokenizer = tokenizer
    195       @token_scheme = token_scheme
    196       @adaptor   = adaptor
    197       @token_type = tokenizer.next_token
    198     end
    199     
    200     def pattern
    201       case @token_type
    202       when :open then return parse_tree
    203       when :identifier
    204         node = parse_node
    205         @token_type == EOF and return node
    206         return nil
    207       end
    208       return nil
    209     end
    210     
    211     CONTINUE_TYPES = [ :open, :identifier, :percent, :dot ]
    212     
    213     def parse_tree
    214       @token_type != :open and return nil
    215       @token_type = @tokenizer.next_token
    216       root = parse_node or return nil
    217       
    218       loop do
    219         case @token_type
    220         when :open
    221           subtree = parse_tree
    222           @adaptor.add_child( root, subtree )
    223         when :identifier, :percent, :dot
    224           child = parse_node or return nil
    225           @adaptor.add_child( root, child )
    226         else break
    227         end
    228       end
    229       @token_type == :close or return nil
    230       @token_type = @tokenizer.next_token
    231       return root
    232     end
    233     
    234     def parse_node
    235       label = nil
    236       if @token_type == :percent
    237         ( @token_type = @tokenizer.next_token ) == :identifier or return nil
    238         label = @tokenizer.text
    239         ( @token_type = @tokenizer.next_token ) == :colon or return nil
    240         @token_type = @tokenizer.next_token
    241       end
    242       
    243       if @token_type == :dot
    244         @token_type = @tokenizer.next_token
    245         wildcard_payload = CommonToken.create( :type => 0, :text => '.' )
    246         node = WildcardPattern.new( wildcard_payload )
    247         label and node.label = label
    248         return node
    249       end
    250       
    251       @token_type == :identifier or return nil
    252       token_name = @tokenizer.text
    253       @token_type = @tokenizer.next_token
    254       token_name == 'nil' and return @adaptor.create_flat_list
    255       
    256       text = token_name
    257       arg = nil
    258       if @token_type == :argument
    259         arg = @tokenizer.text
    260         text = arg
    261         @token_type = @tokenizer.next_token
    262       end
    263       
    264       node_type = @token_scheme[ token_name ] || INVALID_TOKEN_TYPE
    265       node = @adaptor.create_from_type( node_type, text )
    266       
    267       if Pattern === node
    268         node.label, node.has_text_arg = label, arg
    269       end
    270       return node
    271     end
    272   end
    273   
    274 
    275 =begin rdoc ANTLR3::AST::Wizard::Pattern
    276 
    277 A simple tree class that represents the skeletal structure of tree. It is used
    278 to validate tree structures as well as to extract nodes that match the pattern.
    279 
    280 =end
    281 
    282   class Pattern < CommonTree
    283     def self.parse( pattern_str, scheme )
    284       PatternParser.parse( 
    285         pattern_str, scheme, PatternAdaptor.new( scheme.token_class )
    286       )
    287     end
    288     
    289     attr_accessor :label, :has_text_arg
    290     alias :has_text_arg? :has_text_arg
    291     
    292     def initialize( payload )
    293       super( payload )
    294       @label = nil
    295       @has_text_arg = nil
    296     end
    297     
    298     def to_s
    299       prefix = @label ? '%' << @label << ':' : ''
    300       return( prefix << super )
    301     end
    302   end
    303   
    304 =begin rdoc ANTLR3::AST::Wizard::WildcardPattern
    305 
    306 A simple tree node used to represent the operation "match any tree node type" in
    307 a tree pattern. They are represented by '.' in tree pattern specifications.
    308 
    309 =end
    310   
    311   class WildcardPattern < Pattern; end
    312   
    313 
    314 =begin rdoc ANTLR3::AST::Wizard::PatternAdaptor
    315 
    316 A customized TreeAdaptor used by AST::Wizards to build tree patterns.
    317 
    318 =end
    319 
    320   class PatternAdaptor < CommonTreeAdaptor
    321     def create_with_payload( payload )
    322       return Pattern.new( payload )
    323     end
    324   end
    325 
    326   attr_accessor :token_scheme, :adaptor
    327 
    328   def initialize( options = {} )
    329     @token_scheme = options.fetch( :token_scheme ) do
    330       TokenScheme.build( options[ :token_class ], options[ :tokens ] )
    331     end
    332     @adaptor = options.fetch( :adaptor ) do
    333       CommonTreeAdaptor.new( @token_scheme.token_class )
    334     end
    335   end
    336   
    337   def create( pattern )
    338     PatternParser.parse( pattern, @token_scheme, @adaptor )
    339   end
    340   
    341   def index( tree, map = {} )
    342     tree or return( map )
    343     type = @adaptor.type_of( tree )
    344     elements = map[ type ] ||= []
    345     elements << tree
    346     @adaptor.each_child( tree ) { | child | index( child, map ) }
    347     return( map )
    348   end
    349   
    350   def find( tree, what )
    351     case what
    352     when Integer then find_token_type( tree, what )
    353     when String  then find_pattern( tree, what )
    354     when Symbol  then find_token_type( tree, @token_scheme[ what ] )
    355     else raise ArgumentError, "search subject must be a token type (integer) or a string"
    356     end
    357   end
    358   
    359   def find_token_type( tree, type )
    360     nodes = []
    361     visit( tree, type ) { | t, | nodes << t }
    362     return nodes
    363   end
    364   
    365   def find_pattern( tree, pattern )
    366     subtrees = []
    367     visit_pattern( tree, pattern ) { | t, | subtrees << t }
    368     return( subtrees )
    369   end
    370   
    371   def visit( tree, what = nil, &block )
    372     block_given? or return enum_for( :visit, tree, what )
    373     Symbol === what and what = @token_scheme[ what ]
    374     case what
    375     when nil then visit_all( tree, &block )
    376     when Integer then visit_type( tree, nil, what, &block )
    377     when String  then visit_pattern( tree, what, &block )
    378     else raise( ArgumentError, tidy( <<-'END', true ) )
    379       | The 'what' filter argument must be a tree
    380       | pattern (String) or a token type (Integer)
    381       | -- got #{ what.inspect }
    382       END
    383     end
    384   end
    385   
    386   def visit_all( tree, parent = nil, &block )
    387     index = @adaptor.child_index( tree )
    388     yield( tree, parent, index, nil )
    389     @adaptor.each_child( tree ) do | child |
    390       visit_all( child, tree, &block )
    391     end
    392   end
    393   
    394   def visit_type( tree, parent, type, &block )
    395     tree.nil? and return( nil )
    396     index = @adaptor.child_index( tree )
    397     @adaptor.type_of( tree ) == type and yield( tree, parent, index, nil )
    398     @adaptor.each_child( tree ) do | child |
    399       visit_type( child, tree, type, &block )
    400     end
    401   end
    402   
    403   def visit_pattern( tree, pattern, &block )
    404     pattern = Pattern.parse( pattern, @token_scheme )
    405     
    406     if pattern.nil? or pattern.flat_list? or pattern.is_a?( WildcardPattern )
    407       return( nil )
    408     end
    409     
    410     visit( tree, pattern.type ) do | tree, parent, child_index, labels |
    411       labels = match!( tree, pattern ) and
    412         yield( tree, parent, child_index, labels )
    413     end
    414   end
    415   
    416   def match( tree, pattern )
    417     pattern = Pattern.parse( pattern, @token_scheme )
    418     
    419     return( match!( tree, pattern ) )
    420   end
    421   
    422   def match!( tree, pattern, labels = {} )
    423     tree.nil? || pattern.nil? and return false
    424     unless pattern.is_a? WildcardPattern
    425       @adaptor.type_of( tree ) == pattern.type or return false
    426       pattern.has_text_arg && ( @adaptor.text_of( tree ) != pattern.text ) and
    427         return false
    428     end
    429     labels[ pattern.label ] = tree if labels && pattern.label
    430     
    431     number_of_children = @adaptor.child_count( tree )
    432     return false unless number_of_children == pattern.child_count
    433     
    434     number_of_children.times do |index|
    435       actual_child = @adaptor.child_of( tree, index )
    436       pattern_child = pattern.child( index )
    437       
    438       return( false ) unless match!( actual_child, pattern_child, labels )
    439     end
    440     
    441     return labels
    442   end
    443   
    444   def equals( tree_a, tree_b, adaptor = @adaptor )
    445     tree_a && tree_b or return( false )
    446     
    447     adaptor.type_of( tree_a ) == adaptor.type_of( tree_b ) or return false
    448     adaptor.text_of( tree_a ) == adaptor.text_of( tree_b ) or return false
    449     
    450     child_count_a = adaptor.child_count( tree_a )
    451     child_count_b = adaptor.child_count( tree_b )
    452     child_count_a == child_count_b or return false
    453     
    454     child_count_a.times do | i |
    455       child_a = adaptor.child_of( tree_a, i )
    456       child_b = adaptor.child_of( tree_b, i )
    457       equals( child_a, child_b, adaptor ) or return false
    458     end
    459     return true
    460   end
    461   
    462   
    463   DOT_DOT_PATTERN = /.*[^\.]\\.{2}[^\.].*/
    464   DOUBLE_ETC_PATTERN = /.*\.{3}\s+\.{3}.*/
    465   
    466   def in_context?( tree, context )
    467     case context
    468     when DOT_DOT_PATTERN then raise ArgumentError, "invalid syntax: .."
    469     when DOUBLE_ETC_PATTERN then raise ArgumentError, "invalid syntax: ... ..."
    470     end
    471     
    472     context = context.gsub( /([^\.\s])\.{3}([^\.])/, '\1 ... \2' )
    473     context.strip!
    474     nodes = context.split( /\s+/ )
    475     
    476     while tree = @adaptor.parent( tree ) and node = nodes.pop
    477       if node == '...'
    478         node = nodes.pop or return( true )
    479         tree = @adaptor.each_ancestor( tree ).find do | t |
    480           @adaptor.type_name( t ) == node
    481         end or return( false )
    482       end
    483       @adaptor.type_name( tree ) == node or return( false )
    484     end
    485     
    486     return( false ) if tree.nil? and not nodes.empty?
    487     return true
    488   end
    489   
    490   private :match!
    491 end
    492 end
    493 end
    494