Home | History | Annotate | Download | only in antlr3
      1 #!/usr/bin/ruby
      2 # encoding: utf-8
      3 
      4 require 'antlr3'
      5 
      6 =begin LICENSE
      7 
      8 [The "BSD licence"]
      9 Copyright (c) 2009-2010 Kyle Yetter
     10 All rights reserved.
     11 
     12 Redistribution and use in source and binary forms, with or without
     13 modification, are permitted provided that the following conditions
     14 are met:
     15 
     16  1. Redistributions of source code must retain the above copyright
     17     notice, this list of conditions and the following disclaimer.
     18  2. Redistributions in binary form must reproduce the above copyright
     19     notice, this list of conditions and the following disclaimer in the
     20     documentation and/or other materials provided with the distribution.
     21  3. The name of the author may not be used to endorse or promote products
     22     derived from this software without specific prior written permission.
     23 
     24 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     25 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     26 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     27 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     28 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     29 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     30 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     31 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     33 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     34 
     35 =end
     36 
     37 module ANTLR3
     38   
     39 =begin rdoc ANTLR3::Debug
     40 
     41 Namespace for all debugging-related class and module definitions.
     42 
     43 =end
     44 
     45 module Debug
     46 
     47 DEFAULT_PORT = 49100
     48 
     49 # since there are many components to the debug-mode
     50 # section of the antlr3 runtime library, most of which
     51 # are not used simultaneously, debug.rb contains the
     52 # base of the debug library and the various listeners
     53 # and tree-related code are autloaded on-demand
     54 autoload :EventSocketProxy, 'antlr3/debug/socket'
     55 autoload :RemoteEventSocketListener, 'antlr3/debug/socket'
     56 autoload :TraceEventListener, 'antlr3/debug/trace-event-listener'
     57 autoload :RecordEventListener, 'antlr3/debug/record-event-listener'
     58 autoload :RuleTracer, 'antlr3/debug/rule-tracer'
     59 autoload :EventHub, 'antlr3/debug/event-hub'
     60 autoload :TreeAdaptor, 'antlr3/tree/debug'
     61 autoload :TreeNodeStream, 'antlr3/tree/debug'
     62 
     63 RecognizerSharedState = Struct.new( 
     64   # the rule invocation depth
     65   :rule_invocation_stack,
     66   # a boolean flag to indicate whether or not the current decision is cyclic
     67   :cyclic_decision,
     68   # a stack that tracks follow sets for error recovery
     69   :following,
     70   # a flag indicating whether or not the recognizer is in error recovery mode
     71   :error_recovery,
     72   # the index in the input stream of the last error
     73   :last_error_index,
     74   # tracks the backtracking depth
     75   :backtracking,
     76   # if a grammar is compiled with the memoization option, this will
     77   # be set to a hash mapping previously parsed rules to cached indices
     78   :rule_memory,
     79   # tracks the number of syntax errors seen so far
     80   :syntax_errors,
     81   # holds newly constructed tokens for lexer rules
     82   :token,
     83   # the input stream index at which the token starts
     84   :token_start_position,
     85   # the input stream line number at which the token starts
     86   :token_start_line,
     87   # the input stream column at which the token starts
     88   :token_start_column,
     89   # the channel value of the target token
     90   :channel,
     91   # the type value of the target token
     92   :type,
     93   # the text of the target token
     94   :text
     95 )
     96 
     97 =begin rdoc ANTLR3::Debug::RecognizerSharedState
     98 
     99 ANTLR3::Debug::RecognizerSharedState is identical to
    100 ANTLR3::RecognizerSharedState, but adds additional fields used for recognizers
    101 generated in debug or profiling mode.
    102 
    103 =end
    104 class RecognizerSharedState
    105   def initialize
    106     super( [], false, [], false, -1, 0, nil, 0, nil, -1 )
    107     # ^-- same as this --v 
    108     # self.following = []
    109     # self.error_recovery = false
    110     # self.last_error_index = -1
    111     # self.backtracking = 0
    112     # self.syntax_errors = 0
    113     # self.rule_level = 0
    114     # self.token_start_position = -1
    115   end
    116   
    117   def reset!
    118     self.following.clear
    119     self.error_recovery = false
    120     self.last_error_index = -1
    121     self.backtracking = 0
    122     self.rule_memory and rule_memory.clear
    123     self.syntax_errors = 0
    124     self.token = nil
    125     self.token_start_position = -1
    126     self.token_start_line = nil
    127     self.token_start_column = nil
    128     self.channel = nil
    129     self.type = nil
    130     self.text = nil
    131     self.rule_invocation_stack.clear
    132   end
    133   
    134 end
    135 
    136 =begin rdoc ANTLR3::Debug::ParserEvents
    137 
    138 ParserEvents adds debugging event hook methods and functionality that is
    139 required by the code ANTLR generated when called with the <tt>-debug</tt>
    140 switch.
    141 
    142 =end
    143 module ParserEvents
    144   include ANTLR3::Error
    145   
    146   def self.included( klass )
    147     super
    148     if klass.is_a?( ::Class )
    149       def klass.debug?
    150         true
    151       end
    152     end
    153   end
    154   
    155   
    156   attr_reader :debug_listener
    157   
    158   def initialize( stream, options = {} )
    159     @debug_listener = options[ :debug_listener ] ||= begin
    160       EventSocketProxy.new( self, options ).handshake
    161     end
    162     options[ :state ] ||= Debug::RecognizerSharedState.new
    163     super( stream, options )
    164     if @input.is_a?( Debug::TokenStream )
    165       @input.debug_listener ||= @debug_listener
    166     else
    167       @input = Debug::TokenStream.wrap( @input, @debug_listener )
    168     end
    169   end
    170   
    171   def rule_level
    172     @state.rule_invocation_stack.length
    173   end
    174   
    175   def cyclic_decision?
    176     @state.cyclic_decision
    177   end
    178   
    179   def cyclic_decision=( flag )
    180     @state.cyclic_decision = flag
    181   end
    182   
    183   # custom attribute writer for debug_listener
    184   # propegates the change in listener to the
    185   # parser's debugging input stream
    186   def debug_listener=( dbg )
    187     @debug_listener = dbg
    188     @input.debug_listener = dbg rescue nil
    189   end
    190   
    191   def begin_resync
    192     @debug_listener.begin_resync
    193     super
    194   end
    195   
    196   def end_resync
    197     @debug_listener.end_resync
    198     super
    199   end
    200   
    201   # TO-DO: is this pointless?
    202   def resync
    203     begin_resync
    204     yield( self )
    205   ensure
    206     end_resync
    207   end
    208   
    209   def begin_backtrack
    210     @debug_listener.begin_backtrack( @state.backtracking )
    211   end
    212   
    213   def end_backtrack( successful )
    214     @debug_listener.end_backtrack( @state.backtracking, successful )
    215   end
    216   
    217   def backtrack
    218     @state.backtracking += 1
    219     @debug_listener.begin_backtrack( @state.backtracking )
    220     start = @input.mark
    221     success =
    222       begin yield
    223       rescue BacktrackingFailed then false
    224       else true
    225       end
    226     return success
    227   ensure
    228     @input.rewind( start )
    229     @debug_listener.end_backtrack( @state.backtracking, ( success rescue nil ) )
    230     @state.backtracking -= 1
    231   end
    232   
    233   def report_error( exc )
    234     ANTLR3::RecognitionError === exc and
    235       @debug_listener.recognition_exception( exc )
    236     super
    237   end
    238   
    239   def missing_symbol( error, expected_type, follow )
    240     symbol = super
    241     @debug_listener.consume_node( symbol )
    242     return( symbol )
    243   end
    244   
    245   def in_rule( grammar_file, rule_name )
    246     @state.rule_invocation_stack.empty? and @debug_listener.commence
    247     @debug_listener.enter_rule( grammar_file, rule_name )
    248     @state.rule_invocation_stack.push( grammar_file, rule_name )
    249     yield
    250   ensure
    251     @state.rule_invocation_stack.pop( 2 )
    252     @debug_listener.exit_rule( grammar_file, rule_name )
    253     @state.rule_invocation_stack.empty? and @debug_listener.terminate
    254   end
    255   
    256   def rule_invocation_stack
    257     @state.rule_invocation_stack.each_slice( 2 ).to_a
    258   end
    259   
    260   def predicate?( description )
    261     result = yield
    262     @debug_listener.semantic_predicate( result, description )
    263     return result
    264   end
    265   
    266   def in_alternative( alt_number )
    267     @debug_listener.enter_alternative( alt_number )
    268   end
    269   
    270   def in_subrule( decision_number )
    271     @debug_listener.enter_subrule( decision_number )
    272     yield
    273   ensure
    274     @debug_listener.exit_subrule( decision_number )
    275   end
    276   
    277   def in_decision( decision_number )
    278     @debug_listener.enter_decision( decision_number )
    279     yield
    280   ensure
    281     @debug_listener.exit_decision( decision_number )
    282   end
    283 end
    284 
    285 
    286 =begin rdoc ANTLR3::Debug::TokenStream
    287 
    288 A module that wraps token stream methods with debugging event code. A debuggable
    289 parser will <tt>extend</tt> its input stream with this module if the stream is
    290 not already a Debug::TokenStream.
    291 
    292 =end
    293 module TokenStream
    294   
    295   def self.wrap( stream, debug_listener = nil )
    296     stream.extend( self )
    297     stream.instance_eval do
    298       @initial_stream_state = true
    299       @debug_listener = debug_listener
    300       @last_marker = nil
    301     end
    302     return( stream )
    303   end
    304   attr_reader :last_marker
    305   attr_accessor :debug_listener
    306   
    307   def consume
    308     @initial_stream_state and consume_initial_hidden_tokens
    309     a = index + 1 # the next position IF there are no hidden tokens in between
    310     t = super
    311     b = index     # the actual position after consuming
    312     @debug_listener.consume_token( t ) if @debug_listener
    313     
    314     # if b > a, report the consumption of hidden tokens
    315     for i in a...b
    316       @debug_listener.consume_hidden_token at( i )
    317     end
    318   end
    319   
    320   
    321   # after a token stream fills up its buffer
    322   # by exhausting its token source, it may
    323   # skip to an initial position beyond the first
    324   # actual token, if there are hidden tokens
    325   # at the beginning of the stream.
    326   #
    327   # This private method is used to
    328   # figure out if any hidden tokens
    329   # were skipped initially, and then
    330   # report their consumption to
    331   # the debug listener
    332   def consume_initial_hidden_tokens
    333     first_on_channel_token_index = self.index
    334     first_on_channel_token_index.times do |index|
    335       @debug_listener.consume_hidden_token at( index )
    336     end
    337     @initial_stream_state = false
    338   end
    339   
    340   private :consume_initial_hidden_tokens
    341   
    342   ############################################################################################
    343   ###################################### Stream Methods ######################################
    344   ############################################################################################
    345   
    346   def look( steps = 1 )
    347     @initial_stream_state and consume_initial_hidden_tokens
    348     token = super( steps )
    349     @debug_listener.look( steps, token )
    350     return token
    351   end
    352   
    353   def peek( steps = 1 )
    354     look( steps ).type
    355   end
    356   
    357   def mark
    358     @last_marker = super
    359     @debug_listener.mark( @last_marker )
    360     return @last_marker
    361   end
    362   
    363   def rewind( marker = nil, release = true )
    364     @debug_listener.rewind( marker )
    365     super
    366   end
    367 end
    368 
    369 =begin rdoc ANTLR3::Debug::EventListener
    370 
    371 A listener that simply records text representations of the events. Useful for debugging the
    372 debugging facility ;) Subclasses can override the record() method (which defaults to printing
    373 to stdout) to record the events in a different way.
    374 
    375 =end
    376 module EventListener
    377   PROTOCOL_VERSION = '2'
    378   # The parser has just entered a rule. No decision has been made about
    379   # which alt is predicted.  This is fired AFTER init actions have been
    380   # executed.  Attributes are defined and available etc...
    381   # The grammarFileName allows composite grammars to jump around among
    382   # multiple grammar files.
    383   
    384   def enter_rule( grammar_file, rule_name )
    385     # do nothing
    386   end
    387   
    388   # Because rules can have lots of alternatives, it is very useful to
    389   # know which alt you are entering.  This is 1..n for n alts.
    390   
    391   def enter_alternative( alt )
    392     # do nothing
    393   end
    394   
    395   # This is the last thing executed before leaving a rule.  It is
    396   # executed even if an exception is thrown.  This is triggered after
    397   # error reporting and recovery have occurred (unless the exception is
    398   # not caught in this rule).  This implies an "exitAlt" event.
    399   # The grammarFileName allows composite grammars to jump around among
    400   # multiple grammar files.
    401   
    402   def exit_rule( grammar_file, rule_name )
    403     # do nothing
    404   end
    405 
    406   # Track entry into any (...) subrule other EBNF construct
    407   
    408   def enter_subrule( decision_number )
    409     # do nothing
    410   end
    411 
    412   def exit_subrule( decision_number )
    413     # do nothing
    414   end
    415   
    416   # Every decision, fixed k or arbitrary, has an enter/exit event
    417   # so that a GUI can easily track what look/consume events are
    418   # associated with prediction.  You will see a single enter/exit
    419   # subrule but multiple enter/exit decision events, one for each
    420   # loop iteration.
    421   
    422   def enter_decision( decision_number )
    423     # do nothing
    424   end
    425 
    426   def exit_decision( decision_number )
    427     # do nothing
    428   end
    429 
    430   # An input token was consumed; matched by any kind of element.
    431   # Trigger after the token was matched by things like match(), matchAny().
    432   
    433   def consume_token( tree )
    434     # do nothing
    435   end
    436 
    437   # An off-channel input token was consumed.
    438   # Trigger after the token was matched by things like match(), matchAny().
    439   # (unless of course the hidden token is first stuff in the input stream).
    440   
    441   def consume_hidden_token( tree )
    442     # do nothing
    443   end
    444 
    445   # Somebody (anybody) looked ahead.  Note that this actually gets
    446   # triggered by both peek and look calls.  The debugger will want to know
    447   # which Token object was examined.  Like consumeToken, this indicates
    448   # what token was seen at that depth.  A remote debugger cannot look
    449   # ahead into a file it doesn't have so look events must pass the token
    450   # even if the info is redundant.
    451   
    452   def look( i, tree )
    453     # do nothing
    454   end
    455 
    456   # The parser is going to look arbitrarily ahead; mark this location,
    457   # the token stream's marker is sent in case you need it.
    458   
    459   def mark( marker )
    460     # do nothing
    461   end
    462 
    463   # After an arbitrairly long look as with a cyclic DFA (or with
    464   # any backtrack), this informs the debugger that stream should be
    465   # rewound to the position associated with marker.
    466   
    467   def rewind( marker = nil )
    468     # do nothing
    469   end
    470 
    471   def begin_backtrack( level )
    472     # do nothing
    473   end
    474 
    475   def end_backtrack( level, successful )
    476     # do nothing
    477   end
    478   
    479   def backtrack( level )
    480     begin_backtrack( level )
    481     successful = yield( self )
    482     end_backtrack( level, successful )
    483   end
    484 
    485   # To watch a parser move through the grammar, the parser needs to
    486   # inform the debugger what line/charPos it is passing in the grammar.
    487   # For now, this does not know how to switch from one grammar to the
    488   # other and back for island grammars etc...
    489   # This should also allow breakpoints because the debugger can stop
    490   # the parser whenever it hits this line/pos.
    491   
    492   def location( line, position )
    493     # do nothing
    494   end
    495 
    496   # A recognition exception occurred such as NoViableAltError.  I made
    497   # this a generic event so that I can alter the exception hierachy later
    498   # without having to alter all the debug objects.
    499   # Upon error, the stack of enter rule/subrule must be properly unwound.
    500   # If no viable alt occurs it is within an enter/exit decision, which
    501   # also must be rewound.  Even the rewind for each mark must be unwount.
    502   # In the Java target this is pretty easy using try/finally, if a bit
    503   # ugly in the generated code.  The rewind is generated in DFA.predict()
    504   # actually so no code needs to be generated for that.  For languages
    505   # w/o this "finally" feature (C++?), the target implementor will have
    506   # to build an event stack or something.
    507   # Across a socket for remote debugging, only the RecognitionError
    508   # data fields are transmitted.  The token object or whatever that
    509   # caused the problem was the last object referenced by look.  The
    510   # immediately preceding look event should hold the unexpected Token or
    511   # char.
    512   # Here is a sample event trace for grammar:
    513   # b : C ({;}A|B) // {;} is there to prevent A|B becoming a set
    514   # | D
    515   # ;
    516   # The sequence for this rule (with no viable alt in the subrule) for
    517   # input 'c c' (there are 3 tokens) is:
    518   # commence
    519   # look
    520   # enterRule b
    521   # location 7 1
    522   # enter decision 3
    523   # look
    524   # exit decision 3
    525   # enterAlt1
    526   # location 7 5
    527   # look
    528   # consumeToken [c/<4>,1:0]
    529   # location 7 7
    530   # enterSubRule 2
    531   # enter decision 2
    532   # look
    533   # look
    534   # recognitionError NoViableAltError 2 1 2
    535   # exit decision 2
    536   # exitSubRule 2
    537   # beginResync
    538   # look
    539   # consumeToken [c/<4>,1:1]
    540   # look
    541   # endResync
    542   # look(-1)
    543   # exitRule b
    544   # terminate
    545   
    546   def recognition_exception( exception )
    547     # do nothing
    548   end
    549 
    550   # Indicates the recognizer is about to consume tokens to resynchronize
    551   # the parser.  Any consume events from here until the recovered event
    552   # are not part of the parse--they are dead tokens.
    553   
    554   def begin_resync()
    555     # do nothing
    556   end
    557 
    558   # Indicates that the recognizer has finished consuming tokens in order
    559   # to resychronize.  There may be multiple beginResync/endResync pairs
    560   # before the recognizer comes out of errorRecovery mode (in which
    561   # multiple errors are suppressed).  This will be useful
    562   # in a gui where you want to probably grey out tokens that are consumed
    563   # but not matched to anything in grammar.  Anything between
    564   # a beginResync/endResync pair was tossed out by the parser.
    565   
    566   def end_resync()
    567     # do nothing
    568   end
    569   
    570   def resync
    571     begin_resync
    572     yield( self )
    573     end_resync
    574   end
    575 
    576   # A semantic predicate was evaluate with this result and action text
    577   
    578   def semantic_predicate( result, predicate )
    579     # do nothing
    580   end
    581   
    582   # Announce that parsing has begun.  Not technically useful except for
    583   # sending events over a socket.  A GUI for example will launch a thread
    584   # to connect and communicate with a remote parser.  The thread will want
    585   # to notify the GUI when a connection is made.  ANTLR parsers
    586   # trigger this upon entry to the first rule (the ruleLevel is used to
    587   # figure this out).
    588   
    589   def commence(  )
    590     # do nothing
    591   end
    592 
    593   # Parsing is over; successfully or not.  Mostly useful for telling
    594   # remote debugging listeners that it's time to quit.  When the rule
    595   # invocation level goes to zero at the end of a rule, we are done
    596   # parsing.
    597   
    598   def terminate(  )
    599     # do nothing
    600   end
    601 
    602   # Input for a tree parser is an AST, but we know nothing for sure
    603   # about a node except its type and text (obtained from the adaptor).
    604   # This is the analog of the consumeToken method.  Again, the ID is
    605   # the hashCode usually of the node so it only works if hashCode is
    606   # not implemented.  If the type is UP or DOWN, then
    607   # the ID is not really meaningful as it's fixed--there is
    608   # just one UP node and one DOWN navigation node.
    609   
    610   def consume_node( tree )
    611     # do nothing
    612   end
    613   
    614   # A nil was created (even nil nodes have a unique ID...
    615   # they are not "null" per se).  As of 4/28/2006, this
    616   # seems to be uniquely triggered when starting a new subtree
    617   # such as when entering a subrule in automatic mode and when
    618   # building a tree in rewrite mode.
    619   # If you are receiving this event over a socket via
    620   # RemoteDebugEventSocketListener then only tree.ID is set.
    621   
    622   def flat_node( tree )
    623     # do nothing
    624   end
    625 
    626   # Upon syntax error, recognizers bracket the error with an error node
    627   # if they are building ASTs.
    628   
    629   def error_node( tree )
    630     # do nothing
    631   end
    632 
    633   # Announce a new node built from token elements such as type etc...
    634   # If you are receiving this event over a socket via
    635   # RemoteDebugEventSocketListener then only tree.ID, type, text are
    636   # set.
    637   
    638   def create_node( node, token = nil )
    639     # do nothing
    640   end
    641 
    642   # Make a node the new root of an existing root.
    643   # Note: the newRootID parameter is possibly different
    644   # than the TreeAdaptor.becomeRoot() newRoot parameter.
    645   # In our case, it will always be the result of calling
    646   # TreeAdaptor.becomeRoot() and not root_n or whatever.
    647   # The listener should assume that this event occurs
    648   # only when the current subrule (or rule) subtree is
    649   # being reset to newRootID.
    650   # If you are receiving this event over a socket via
    651   # RemoteDebugEventSocketListener then only IDs are set.
    652   # @see antlr3.tree.TreeAdaptor.becomeRoot()
    653   
    654   def become_root( new_root, old_root )
    655     # do nothing
    656   end
    657 
    658   # Make childID a child of rootID.
    659   # If you are receiving this event over a socket via
    660   # RemoteDebugEventSocketListener then only IDs are set.
    661   # @see antlr3.tree.TreeAdaptor.addChild()
    662   
    663   def add_child( root, child )
    664     # do nothing
    665   end
    666 
    667   # Set the token start/stop token index for a subtree root or node.
    668   # If you are receiving this event over a socket via
    669   # RemoteDebugEventSocketListener then only tree.ID is set.
    670   
    671   def set_token_boundaries( tree, token_start_index, token_stop_index )
    672     # do nothing
    673   end
    674   
    675   def examine_rule_memoization( rule )
    676     # do nothing
    677   end
    678   
    679   def on( event_name, &block )
    680     sclass = class << self; self; end
    681     sclass.send( :define_method, event_name, &block )
    682   end
    683   
    684   EVENTS = [ 
    685     :add_child, :backtrack, :become_root, :begin_backtrack,
    686     :begin_resync, :commence, :consume_hidden_token,
    687     :consume_node, :consume_token, :create_node, :end_backtrack,
    688     :end_resync, :enter_alternative, :enter_decision, :enter_rule,
    689     :enter_sub_rule, :error_node, :exit_decision, :exit_rule,
    690     :exit_sub_rule, :flat_node, :location, :look, :mark,
    691     :recognition_exception, :resync, :rewind,
    692     :semantic_predicate, :set_token_boundaries, :terminate
    693   ].freeze
    694 
    695 end
    696 end
    697 end
    698