Home | History | Annotate | Download | only in antlr3
      1 # begin[licence]
      2 #
      3 #  [The "BSD licence"]
      4 #  Copyright (c) 2005-2009 Terence Parr
      5 #  All rights reserved.
      6 
      7 #  Redistribution and use in source and binary forms, with or without
      8 #  modification, are permitted provided that the following conditions
      9 #  are met:
     10 #  1. Redistributions of source code must retain the above copyright
     11 #     notice, this list of conditions and the following disclaimer.
     12 #  2. Redistributions in binary form must reproduce the above copyright
     13 #     notice, this list of conditions and the following disclaimer in the
     14 #     documentation and/or other materials provided with the distribution.
     15 #  3. The name of the author may not be used to endorse or promote products
     16 #     derived from this software without specific prior written permission.
     17 
     18 #  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19 #  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20 #  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21 #  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22 #  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23 #  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24 #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25 #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26 #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27 #  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28 #
     29 # end[licence]
     30 
     31 import socket
     32 from antlr3 import Parser, TokenStream, RecognitionException, Token
     33 from antlr3.tree import CommonTreeAdaptor, TreeAdaptor, Tree
     34 
     35 class DebugParser(Parser):
     36     def __init__(self, stream, state=None, dbg=None, *args, **kwargs):
     37         # wrap token stream in DebugTokenStream (unless user already did so).
     38         if not isinstance(stream, DebugTokenStream):
     39             stream = DebugTokenStream(stream, dbg)
     40 
     41         super(DebugParser, self).__init__(stream, state, *args, **kwargs)
     42 
     43         # Who to notify when events in the parser occur.
     44         self._dbg = None
     45 
     46         self.setDebugListener(dbg)
     47 
     48 
     49     def setDebugListener(self, dbg):
     50 	"""Provide a new debug event listener for this parser.  Notify the
     51         input stream too that it should send events to this listener.
     52 	"""
     53 
     54         if hasattr(self.input, 'dbg'):
     55             self.input.dbg = dbg
     56 
     57         self._dbg = dbg
     58 
     59     def getDebugListener(self):
     60         return self._dbg
     61 
     62     dbg = property(getDebugListener, setDebugListener)
     63 
     64 
     65     def beginResync(self):
     66         self._dbg.beginResync()
     67 
     68 
     69     def endResync(self):
     70         self._dbg.endResync()
     71 
     72 
     73     def beginBacktrack(self, level):
     74         self._dbg.beginBacktrack(level)
     75 
     76 
     77     def endBacktrack(self, level, successful):
     78         self._dbg.endBacktrack(level,successful)
     79 
     80 
     81     def reportError(self, exc):
     82         Parser.reportError(self, exc)
     83 
     84         if isinstance(exc, RecognitionException):
     85             self._dbg.recognitionException(exc)
     86 
     87 
     88 class DebugTokenStream(TokenStream):
     89     def __init__(self, input, dbg=None):
     90         self.input = input
     91         self.initialStreamState = True
     92         # Track the last mark() call result value for use in rewind().
     93         self.lastMarker = None
     94 
     95         self._dbg = None
     96         self.setDebugListener(dbg)
     97 
     98         # force TokenStream to get at least first valid token
     99         # so we know if there are any hidden tokens first in the stream
    100         self.input.LT(1)
    101 
    102 
    103     def getDebugListener(self):
    104         return self._dbg
    105 
    106     def setDebugListener(self, dbg):
    107         self._dbg = dbg
    108 
    109     dbg = property(getDebugListener, setDebugListener)
    110 
    111 
    112     def consume(self):
    113         if self.initialStreamState:
    114             self.consumeInitialHiddenTokens()
    115 
    116         a = self.input.index()
    117         t = self.input.LT(1)
    118         self.input.consume()
    119         b = self.input.index()
    120         self._dbg.consumeToken(t)
    121 
    122         if b > a+1:
    123             # then we consumed more than one token; must be off channel tokens
    124             for idx in range(a+1, b):
    125                 self._dbg.consumeHiddenToken(self.input.get(idx));
    126 
    127 
    128     def consumeInitialHiddenTokens(self):
    129         """consume all initial off-channel tokens"""
    130 
    131         firstOnChannelTokenIndex = self.input.index()
    132         for idx in range(firstOnChannelTokenIndex):
    133             self._dbg.consumeHiddenToken(self.input.get(idx))
    134 
    135         self.initialStreamState = False
    136 
    137 
    138     def LT(self, i):
    139         if self.initialStreamState:
    140             self.consumeInitialHiddenTokens()
    141 
    142         t = self.input.LT(i)
    143         self._dbg.LT(i, t)
    144         return t
    145 
    146 
    147     def LA(self, i):
    148         if self.initialStreamState:
    149             self.consumeInitialHiddenTokens()
    150 
    151         t = self.input.LT(i)
    152         self._dbg.LT(i, t)
    153         return t.type
    154 
    155 
    156     def get(self, i):
    157         return self.input.get(i)
    158 
    159 
    160     def index(self):
    161         return self.input.index()
    162 
    163 
    164     def mark(self):
    165         self.lastMarker = self.input.mark()
    166         self._dbg.mark(self.lastMarker)
    167         return self.lastMarker
    168 
    169 
    170     def rewind(self, marker=None):
    171         self._dbg.rewind(marker)
    172         self.input.rewind(marker)
    173 
    174 
    175     def release(self, marker):
    176         pass
    177 
    178 
    179     def seek(self, index):
    180         # TODO: implement seek in dbg interface
    181         # self._dbg.seek(index);
    182         self.input.seek(index)
    183 
    184 
    185     def size(self):
    186         return self.input.size()
    187 
    188 
    189     def getTokenSource(self):
    190         return self.input.getTokenSource()
    191 
    192 
    193     def getSourceName(self):
    194         return self.getTokenSource().getSourceName()
    195 
    196 
    197     def toString(self, start=None, stop=None):
    198         return self.input.toString(start, stop)
    199 
    200 
    201 class DebugTreeAdaptor(TreeAdaptor):
    202     """A TreeAdaptor proxy that fires debugging events to a DebugEventListener
    203     delegate and uses the TreeAdaptor delegate to do the actual work.  All
    204     AST events are triggered by this adaptor; no code gen changes are needed
    205     in generated rules.  Debugging events are triggered *after* invoking
    206     tree adaptor routines.
    207 
    208     Trees created with actions in rewrite actions like "-> ^(ADD {foo} {bar})"
    209     cannot be tracked as they might not use the adaptor to create foo, bar.
    210     The debug listener has to deal with tree node IDs for which it did
    211     not see a createNode event.  A single <unknown> node is sufficient even
    212     if it represents a whole tree.
    213     """
    214 
    215     def __init__(self, dbg, adaptor):
    216         self.dbg = dbg
    217         self.adaptor = adaptor
    218 
    219 
    220     def createWithPayload(self, payload):
    221         if payload.getTokenIndex() < 0:
    222             # could be token conjured up during error recovery
    223             return self.createFromType(payload.getType(), payload.getText())
    224 
    225         node = self.adaptor.createWithPayload(payload)
    226         self.dbg.createNode(node, payload)
    227         return node
    228 
    229     def createFromToken(self, tokenType, fromToken, text=None):
    230         node = self.adaptor.createFromToken(tokenType, fromToken, text)
    231         self.dbg.createNode(node)
    232         return node
    233 
    234     def createFromType(self, tokenType, text):
    235         node = self.adaptor.createFromType(tokenType, text)
    236         self.dbg.createNode(node)
    237         return node
    238 
    239 
    240     def errorNode(self, input, start, stop, exc):
    241         node = selfadaptor.errorNode(input, start, stop, exc)
    242         if node is not None:
    243             dbg.errorNode(node)
    244 
    245         return node
    246 
    247 
    248     def dupTree(self, tree):
    249         t = self.adaptor.dupTree(tree)
    250         # walk the tree and emit create and add child events
    251         # to simulate what dupTree has done. dupTree does not call this debug
    252         # adapter so I must simulate.
    253         self.simulateTreeConstruction(t)
    254         return t
    255 
    256 
    257     def simulateTreeConstruction(self, t):
    258 	"""^(A B C): emit create A, create B, add child, ..."""
    259         self.dbg.createNode(t)
    260         for i in range(self.adaptor.getChildCount(t)):
    261             child = self.adaptor.getChild(t, i)
    262             self.simulateTreeConstruction(child)
    263             self.dbg.addChild(t, child)
    264 
    265 
    266     def dupNode(self, treeNode):
    267         d = self.adaptor.dupNode(treeNode)
    268         self.dbg.createNode(d)
    269         return d
    270 
    271 
    272     def nil(self):
    273         node = self.adaptor.nil()
    274         self.dbg.nilNode(node)
    275         return node
    276 
    277 
    278     def isNil(self, tree):
    279         return self.adaptor.isNil(tree)
    280 
    281 
    282     def addChild(self, t, child):
    283         if isinstance(child, Token):
    284             n = self.createWithPayload(child)
    285             self.addChild(t, n)
    286 
    287         else:
    288             if t is None or child is None:
    289                 return
    290 
    291             self.adaptor.addChild(t, child)
    292             self.dbg.addChild(t, child)
    293 
    294     def becomeRoot(self, newRoot, oldRoot):
    295         if isinstance(newRoot, Token):
    296             n = self.createWithPayload(newRoot)
    297             self.adaptor.becomeRoot(n, oldRoot)
    298         else:
    299             n = self.adaptor.becomeRoot(newRoot, oldRoot)
    300 
    301         self.dbg.becomeRoot(newRoot, oldRoot)
    302         return n
    303 
    304 
    305     def rulePostProcessing(self, root):
    306         return self.adaptor.rulePostProcessing(root)
    307 
    308 
    309     def getType(self, t):
    310         return self.adaptor.getType(t)
    311 
    312 
    313     def setType(self, t, type):
    314         self.adaptor.setType(t, type)
    315 
    316 
    317     def getText(self, t):
    318         return self.adaptor.getText(t)
    319 
    320 
    321     def setText(self, t, text):
    322         self.adaptor.setText(t, text)
    323 
    324 
    325     def getToken(self, t):
    326         return self.adaptor.getToken(t)
    327 
    328 
    329     def setTokenBoundaries(self, t, startToken, stopToken):
    330         self.adaptor.setTokenBoundaries(t, startToken, stopToken)
    331         if t is not None and startToken is not None and stopToken is not None:
    332             self.dbg.setTokenBoundaries(
    333                 t, startToken.getTokenIndex(),
    334                 stopToken.getTokenIndex())
    335 
    336 
    337     def getTokenStartIndex(self, t):
    338         return self.adaptor.getTokenStartIndex(t)
    339 
    340 
    341     def getTokenStopIndex(self, t):
    342         return self.adaptor.getTokenStopIndex(t)
    343 
    344 
    345     def getChild(self, t, i):
    346         return self.adaptor.getChild(t, i)
    347 
    348 
    349     def setChild(self, t, i, child):
    350         self.adaptor.setChild(t, i, child)
    351 
    352 
    353     def deleteChild(self, t, i):
    354         return self.adaptor.deleteChild(t, i)
    355 
    356 
    357     def getChildCount(self, t):
    358         return self.adaptor.getChildCount(t)
    359 
    360 
    361     def getUniqueID(self, node):
    362         return self.adaptor.getUniqueID(node)
    363 
    364 
    365     def getParent(self, t):
    366         return self.adaptor.getParent(t)
    367 
    368 
    369     def getChildIndex(self, t):
    370         return self.adaptor.getChildIndex(t)
    371 
    372 
    373     def setParent(self, t, parent):
    374         self.adaptor.setParent(t, parent)
    375 
    376 
    377     def setChildIndex(self, t, index):
    378         self.adaptor.setChildIndex(t, index)
    379 
    380 
    381     def replaceChildren(self, parent, startChildIndex, stopChildIndex, t):
    382         self.adaptor.replaceChildren(parent, startChildIndex, stopChildIndex, t)
    383 
    384 
    385     ## support
    386 
    387     def getDebugListener(self):
    388         return dbg
    389 
    390     def setDebugListener(self, dbg):
    391         self.dbg = dbg
    392 
    393 
    394     def getTreeAdaptor(self):
    395         return self.adaptor
    396 
    397 
    398 
    399 class DebugEventListener(object):
    400     """All debugging events that a recognizer can trigger.
    401 
    402     I did not create a separate AST debugging interface as it would create
    403     lots of extra classes and DebugParser has a dbg var defined, which makes
    404     it hard to change to ASTDebugEventListener.  I looked hard at this issue
    405     and it is easier to understand as one monolithic event interface for all
    406     possible events.  Hopefully, adding ST debugging stuff won't be bad.  Leave
    407     for future. 4/26/2006.
    408     """
    409 
    410     # Moved to version 2 for v3.1: added grammar name to enter/exit Rule
    411     PROTOCOL_VERSION = "2"
    412 
    413     def enterRule(self, grammarFileName, ruleName):
    414 	"""The parser has just entered a rule. No decision has been made about
    415         which alt is predicted.  This is fired AFTER init actions have been
    416         executed.  Attributes are defined and available etc...
    417         The grammarFileName allows composite grammars to jump around among
    418         multiple grammar files.
    419         """
    420 
    421         pass
    422 
    423 
    424     def enterAlt(self, alt):
    425 	"""Because rules can have lots of alternatives, it is very useful to
    426         know which alt you are entering.  This is 1..n for n alts.
    427         """
    428         pass
    429 
    430 
    431     def exitRule(self, grammarFileName, ruleName):
    432 	"""This is the last thing executed before leaving a rule.  It is
    433         executed even if an exception is thrown.  This is triggered after
    434         error reporting and recovery have occurred (unless the exception is
    435         not caught in this rule).  This implies an "exitAlt" event.
    436         The grammarFileName allows composite grammars to jump around among
    437         multiple grammar files.
    438 	"""
    439         pass
    440 
    441 
    442     def enterSubRule(self, decisionNumber):
    443 	"""Track entry into any (...) subrule other EBNF construct"""
    444         pass
    445 
    446 
    447     def exitSubRule(self, decisionNumber):
    448         pass
    449 
    450 
    451     def enterDecision(self, decisionNumber, couldBacktrack):
    452 	"""Every decision, fixed k or arbitrary, has an enter/exit event
    453         so that a GUI can easily track what LT/consume events are
    454         associated with prediction.  You will see a single enter/exit
    455         subrule but multiple enter/exit decision events, one for each
    456         loop iteration.
    457         """
    458         pass
    459 
    460 
    461     def exitDecision(self, decisionNumber):
    462         pass
    463 
    464 
    465     def consumeToken(self, t):
    466 	"""An input token was consumed; matched by any kind of element.
    467         Trigger after the token was matched by things like match(), matchAny().
    468 	"""
    469         pass
    470 
    471 
    472     def consumeHiddenToken(self, t):
    473 	"""An off-channel input token was consumed.
    474         Trigger after the token was matched by things like match(), matchAny().
    475         (unless of course the hidden token is first stuff in the input stream).
    476 	"""
    477         pass
    478 
    479 
    480     def LT(self, i, t):
    481 	"""Somebody (anybody) looked ahead.  Note that this actually gets
    482         triggered by both LA and LT calls.  The debugger will want to know
    483         which Token object was examined.  Like consumeToken, this indicates
    484         what token was seen at that depth.  A remote debugger cannot look
    485         ahead into a file it doesn't have so LT events must pass the token
    486         even if the info is redundant.
    487 	"""
    488         pass
    489 
    490 
    491     def mark(self, marker):
    492 	"""The parser is going to look arbitrarily ahead; mark this location,
    493         the token stream's marker is sent in case you need it.
    494 	"""
    495         pass
    496 
    497 
    498     def rewind(self, marker=None):
    499 	"""After an arbitrairly long lookahead as with a cyclic DFA (or with
    500         any backtrack), this informs the debugger that stream should be
    501         rewound to the position associated with marker.
    502 
    503         """
    504         pass
    505 
    506 
    507     def beginBacktrack(self, level):
    508         pass
    509 
    510 
    511     def endBacktrack(self, level, successful):
    512         pass
    513 
    514 
    515     def location(self, line, pos):
    516 	"""To watch a parser move through the grammar, the parser needs to
    517         inform the debugger what line/charPos it is passing in the grammar.
    518         For now, this does not know how to switch from one grammar to the
    519         other and back for island grammars etc...
    520 
    521         This should also allow breakpoints because the debugger can stop
    522         the parser whenever it hits this line/pos.
    523 	"""
    524         pass
    525 
    526 
    527     def recognitionException(self, e):
    528 	"""A recognition exception occurred such as NoViableAltException.  I made
    529         this a generic event so that I can alter the exception hierachy later
    530         without having to alter all the debug objects.
    531 
    532         Upon error, the stack of enter rule/subrule must be properly unwound.
    533         If no viable alt occurs it is within an enter/exit decision, which
    534         also must be rewound.  Even the rewind for each mark must be unwount.
    535         In the Java target this is pretty easy using try/finally, if a bit
    536         ugly in the generated code.  The rewind is generated in DFA.predict()
    537         actually so no code needs to be generated for that.  For languages
    538         w/o this "finally" feature (C++?), the target implementor will have
    539         to build an event stack or something.
    540 
    541         Across a socket for remote debugging, only the RecognitionException
    542         data fields are transmitted.  The token object or whatever that
    543         caused the problem was the last object referenced by LT.  The
    544         immediately preceding LT event should hold the unexpected Token or
    545         char.
    546 
    547         Here is a sample event trace for grammar:
    548 
    549         b : C ({;}A|B) // {;} is there to prevent A|B becoming a set
    550           | D
    551           ;
    552 
    553         The sequence for this rule (with no viable alt in the subrule) for
    554         input 'c c' (there are 3 tokens) is:
    555 
    556 		commence
    557 		LT(1)
    558 		enterRule b
    559 		location 7 1
    560 		enter decision 3
    561 		LT(1)
    562 		exit decision 3
    563 		enterAlt1
    564 		location 7 5
    565 		LT(1)
    566 		consumeToken [c/<4>,1:0]
    567 		location 7 7
    568 		enterSubRule 2
    569 		enter decision 2
    570 		LT(1)
    571 		LT(1)
    572 		recognitionException NoViableAltException 2 1 2
    573 		exit decision 2
    574 		exitSubRule 2
    575 		beginResync
    576 		LT(1)
    577 		consumeToken [c/<4>,1:1]
    578 		LT(1)
    579 		endResync
    580 		LT(-1)
    581 		exitRule b
    582 		terminate
    583 	"""
    584         pass
    585 
    586 
    587     def beginResync(self):
    588 	"""Indicates the recognizer is about to consume tokens to resynchronize
    589         the parser.  Any consume events from here until the recovered event
    590         are not part of the parse--they are dead tokens.
    591         """
    592         pass
    593 
    594 
    595     def endResync(self):
    596 	"""Indicates that the recognizer has finished consuming tokens in order
    597         to resychronize.  There may be multiple beginResync/endResync pairs
    598         before the recognizer comes out of errorRecovery mode (in which
    599         multiple errors are suppressed).  This will be useful
    600         in a gui where you want to probably grey out tokens that are consumed
    601         but not matched to anything in grammar.  Anything between
    602         a beginResync/endResync pair was tossed out by the parser.
    603 	"""
    604         pass
    605 
    606 
    607     def semanticPredicate(self, result, predicate):
    608 	"""A semantic predicate was evaluate with this result and action text"""
    609         pass
    610 
    611 
    612     def commence(self):
    613 	"""Announce that parsing has begun.  Not technically useful except for
    614         sending events over a socket.  A GUI for example will launch a thread
    615         to connect and communicate with a remote parser.  The thread will want
    616         to notify the GUI when a connection is made.  ANTLR parsers
    617         trigger this upon entry to the first rule (the ruleLevel is used to
    618         figure this out).
    619 	"""
    620         pass
    621 
    622 
    623     def terminate(self):
    624         """Parsing is over; successfully or not.  Mostly useful for telling
    625         remote debugging listeners that it's time to quit.  When the rule
    626         invocation level goes to zero at the end of a rule, we are done
    627         parsing.
    628 	"""
    629         pass
    630 
    631 
    632     ## T r e e  P a r s i n g
    633 
    634     def consumeNode(self, t):
    635         """Input for a tree parser is an AST, but we know nothing for sure
    636         about a node except its type and text (obtained from the adaptor).
    637         This is the analog of the consumeToken method.  Again, the ID is
    638         the hashCode usually of the node so it only works if hashCode is
    639         not implemented.  If the type is UP or DOWN, then
    640         the ID is not really meaningful as it's fixed--there is
    641         just one UP node and one DOWN navigation node.
    642         """
    643         pass
    644 
    645 
    646     def LT(self, i, t):
    647 	"""The tree parser lookedahead.  If the type is UP or DOWN,
    648         then the ID is not really meaningful as it's fixed--there is
    649         just one UP node and one DOWN navigation node.
    650 	"""
    651         pass
    652 
    653 
    654 
    655     ## A S T  E v e n t s
    656 
    657     def nilNode(self, t):
    658 	"""A nil was created (even nil nodes have a unique ID...
    659         they are not "null" per se).  As of 4/28/2006, this
    660         seems to be uniquely triggered when starting a new subtree
    661         such as when entering a subrule in automatic mode and when
    662         building a tree in rewrite mode.
    663 
    664         If you are receiving this event over a socket via
    665         RemoteDebugEventSocketListener then only t.ID is set.
    666 	"""
    667         pass
    668 
    669 
    670     def errorNode(self, t):
    671 	"""Upon syntax error, recognizers bracket the error with an error node
    672         if they are building ASTs.
    673         """
    674         pass
    675 
    676 
    677     def createNode(self, node, token=None):
    678 	"""Announce a new node built from token elements such as type etc...
    679 
    680         If you are receiving this event over a socket via
    681         RemoteDebugEventSocketListener then only t.ID, type, text are
    682         set.
    683 	"""
    684         pass
    685 
    686 
    687     def becomeRoot(self, newRoot, oldRoot):
    688 	"""Make a node the new root of an existing root.
    689 
    690         Note: the newRootID parameter is possibly different
    691         than the TreeAdaptor.becomeRoot() newRoot parameter.
    692         In our case, it will always be the result of calling
    693         TreeAdaptor.becomeRoot() and not root_n or whatever.
    694 
    695         The listener should assume that this event occurs
    696         only when the current subrule (or rule) subtree is
    697         being reset to newRootID.
    698 
    699         If you are receiving this event over a socket via
    700         RemoteDebugEventSocketListener then only IDs are set.
    701 
    702         @see antlr3.tree.TreeAdaptor.becomeRoot()
    703 	"""
    704         pass
    705 
    706 
    707     def addChild(self, root, child):
    708 	"""Make childID a child of rootID.
    709 
    710         If you are receiving this event over a socket via
    711         RemoteDebugEventSocketListener then only IDs are set.
    712 
    713         @see antlr3.tree.TreeAdaptor.addChild()
    714         """
    715         pass
    716 
    717 
    718     def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex):
    719 	"""Set the token start/stop token index for a subtree root or node.
    720 
    721         If you are receiving this event over a socket via
    722         RemoteDebugEventSocketListener then only t.ID is set.
    723 	"""
    724         pass
    725 
    726 
    727 class BlankDebugEventListener(DebugEventListener):
    728     """A blank listener that does nothing; useful for real classes so
    729     they don't have to have lots of blank methods and are less
    730     sensitive to updates to debug interface.
    731 
    732     Note: this class is identical to DebugEventListener and exists purely
    733     for compatibility with Java.
    734     """
    735     pass
    736 
    737 
    738 class TraceDebugEventListener(DebugEventListener):
    739     """A listener that simply records text representations of the events.
    740 
    741     Useful for debugging the debugging facility ;)
    742 
    743     Subclasses can override the record() method (which defaults to printing to
    744     stdout) to record the events in a different way.
    745     """
    746 
    747     def __init__(self, adaptor=None):
    748         super(TraceDebugEventListener, self).__init__()
    749 
    750         if adaptor is None:
    751             adaptor = CommonTreeAdaptor()
    752         self.adaptor = adaptor
    753 
    754     def record(self, event):
    755         sys.stdout.write(event + '\n')
    756 
    757     def enterRule(self, grammarFileName, ruleName):
    758         self.record("enterRule "+ruleName)
    759 
    760     def exitRule(self, grammarFileName, ruleName):
    761         self.record("exitRule "+ruleName)
    762 
    763     def enterSubRule(self, decisionNumber):
    764         self.record("enterSubRule")
    765 
    766     def exitSubRule(self, decisionNumber):
    767         self.record("exitSubRule")
    768 
    769     def location(self, line, pos):
    770         self.record("location %s:%s" % (line, pos))
    771 
    772     ## Tree parsing stuff
    773 
    774     def consumeNode(self, t):
    775         self.record("consumeNode %s %s %s" % (
    776                 self.adaptor.getUniqueID(t),
    777                 self.adaptor.getText(t),
    778                 self.adaptor.getType(t)))
    779 
    780     def LT(self, i, t):
    781         self.record("LT %s %s %s %s" % (
    782                 i,
    783                 self.adaptor.getUniqueID(t),
    784                 self.adaptor.getText(t),
    785                 self.adaptor.getType(t)))
    786 
    787 
    788     ## AST stuff
    789     def nilNode(self, t):
    790         self.record("nilNode %s" % self.adaptor.getUniqueID(t))
    791 
    792     def createNode(self, t, token=None):
    793         if token is None:
    794             self.record("create %s: %s, %s" % (
    795                     self.adaptor.getUniqueID(t),
    796                     self.adaptor.getText(t),
    797                     self.adaptor.getType(t)))
    798 
    799         else:
    800             self.record("create %s: %s" % (
    801                     self.adaptor.getUniqueID(t),
    802                     token.getTokenIndex()))
    803 
    804     def becomeRoot(self, newRoot, oldRoot):
    805         self.record("becomeRoot %s, %s" % (
    806                 self.adaptor.getUniqueID(newRoot),
    807                 self.adaptor.getUniqueID(oldRoot)))
    808 
    809     def addChild(self, root, child):
    810         self.record("addChild %s, %s" % (
    811                 self.adaptor.getUniqueID(root),
    812                 self.adaptor.getUniqueID(child)))
    813 
    814     def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex):
    815         self.record("setTokenBoundaries %s, %s, %s" % (
    816                 self.adaptor.getUniqueID(t),
    817                 tokenStartIndex, tokenStopIndex))
    818 
    819 
    820 class RecordDebugEventListener(TraceDebugEventListener):
    821     """A listener that records events as strings in an array."""
    822 
    823     def __init__(self, adaptor=None):
    824         super(RecordDebugEventListener, self).__init__(adaptor)
    825 
    826         self.events = []
    827 
    828     def record(self, event):
    829         self.events.append(event)
    830 
    831 
    832 class DebugEventSocketProxy(DebugEventListener):
    833     """A proxy debug event listener that forwards events over a socket to
    834     a debugger (or any other listener) using a simple text-based protocol;
    835     one event per line.  ANTLRWorks listens on server socket with a
    836     RemoteDebugEventSocketListener instance.  These two objects must therefore
    837     be kept in sync.  New events must be handled on both sides of socket.
    838     """
    839 
    840     DEFAULT_DEBUGGER_PORT = 49100
    841 
    842     def __init__(self, recognizer, adaptor=None, port=None,
    843                  debug=None):
    844         super(DebugEventSocketProxy, self).__init__()
    845 
    846         self.grammarFileName = recognizer.getGrammarFileName()
    847 
    848 	# Almost certainly the recognizer will have adaptor set, but
    849         # we don't know how to cast it (Parser or TreeParser) to get
    850         # the adaptor field.  Must be set with a constructor. :(
    851         self.adaptor = adaptor
    852 
    853         self.port = port or self.DEFAULT_DEBUGGER_PORT
    854 
    855         self.debug = debug
    856 
    857         self.socket = None
    858         self.connection = None
    859         self.input = None
    860         self.output = None
    861 
    862 
    863     def log(self, msg):
    864         if self.debug is not None:
    865             self.debug.write(msg + '\n')
    866 
    867 
    868     def handshake(self):
    869         if self.socket is None:
    870             # create listening socket
    871             self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    872             self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    873             self.socket.bind(('', self.port))
    874             self.socket.listen(1)
    875             self.log("Waiting for incoming connection on port %d" % self.port)
    876 
    877             # wait for an incoming connection
    878             self.connection, addr = self.socket.accept()
    879             self.log("Accepted connection from %s:%d" % addr)
    880 
    881             self.connection.setblocking(1)
    882             self.connection.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1)
    883 
    884             # FIXME(pink): wrap into utf8 encoding stream
    885             self.output = self.connection.makefile('w', 0)
    886             self.input = self.connection.makefile('r', 0)
    887 
    888             self.write("ANTLR %s" % self.PROTOCOL_VERSION)
    889             self.write("grammar \"%s" % self.grammarFileName)
    890             self.ack()
    891 
    892 
    893     def write(self, msg):
    894         self.log("> %s" % msg)
    895         self.output.write("%s\n" % msg)
    896         self.output.flush()
    897 
    898 
    899     def ack(self):
    900         t = self.input.readline()
    901         self.log("< %s" % t.rstrip())
    902 
    903 
    904     def transmit(self, event):
    905         self.write(event);
    906         self.ack();
    907 
    908 
    909     def commence(self):
    910         # don't bother sending event; listener will trigger upon connection
    911         pass
    912 
    913 
    914     def terminate(self):
    915         self.transmit("terminate")
    916         self.output.close()
    917         self.input.close()
    918         self.connection.close()
    919         self.socket.close()
    920 
    921 
    922     def enterRule(self, grammarFileName, ruleName):
    923         self.transmit("enterRule\t%s\t%s" % (grammarFileName, ruleName))
    924 
    925 
    926     def enterAlt(self, alt):
    927         self.transmit("enterAlt\t%d" % alt)
    928 
    929 
    930     def exitRule(self, grammarFileName, ruleName):
    931         self.transmit("exitRule\t%s\t%s" % (grammarFileName, ruleName))
    932 
    933 
    934     def enterSubRule(self, decisionNumber):
    935         self.transmit("enterSubRule\t%d" % decisionNumber)
    936 
    937 
    938     def exitSubRule(self, decisionNumber):
    939         self.transmit("exitSubRule\t%d" % decisionNumber)
    940 
    941 
    942     def enterDecision(self, decisionNumber, couldBacktrack):
    943         self.transmit(
    944             "enterDecision\t%d\t%d" % (decisionNumber, couldBacktrack))
    945 
    946 
    947     def exitDecision(self, decisionNumber):
    948         self.transmit("exitDecision\t%d" % decisionNumber)
    949 
    950 
    951     def consumeToken(self, t):
    952         self.transmit("consumeToken\t%s" % self.serializeToken(t))
    953 
    954 
    955     def consumeHiddenToken(self, t):
    956         self.transmit("consumeHiddenToken\t%s" % self.serializeToken(t))
    957 
    958 
    959     def LT(self, i, o):
    960         if isinstance(o, Tree):
    961             return self.LT_tree(i, o)
    962         return self.LT_token(i, o)
    963 
    964 
    965     def LT_token(self, i, t):
    966         if t is not None:
    967             self.transmit("LT\t%d\t%s" % (i, self.serializeToken(t)))
    968 
    969 
    970     def mark(self, i):
    971         self.transmit("mark\t%d" % i)
    972 
    973 
    974     def rewind(self, i=None):
    975         if i is not None:
    976             self.transmit("rewind\t%d" % i)
    977         else:
    978             self.transmit("rewind")
    979 
    980 
    981     def beginBacktrack(self, level):
    982         self.transmit("beginBacktrack\t%d" % level)
    983 
    984 
    985     def endBacktrack(self, level, successful):
    986         self.transmit("endBacktrack\t%d\t%s" % (
    987                 level, ['0', '1'][bool(successful)]))
    988 
    989 
    990     def location(self, line, pos):
    991         self.transmit("location\t%d\t%d" % (line, pos))
    992 
    993 
    994     def recognitionException(self, exc):
    995         self.transmit('\t'.join([
    996                     "exception",
    997                     exc.__class__.__name__,
    998                     str(int(exc.index)),
    999                     str(int(exc.line)),
   1000                     str(int(exc.charPositionInLine))]))
   1001 
   1002 
   1003     def beginResync(self):
   1004         self.transmit("beginResync")
   1005 
   1006 
   1007     def endResync(self):
   1008         self.transmit("endResync")
   1009 
   1010 
   1011     def semanticPredicate(self, result, predicate):
   1012         self.transmit('\t'.join([
   1013                     "semanticPredicate",
   1014                     str(int(result)),
   1015                     self.escapeNewlines(predicate)]))
   1016 
   1017     ## A S T  P a r s i n g  E v e n t s
   1018 
   1019     def consumeNode(self, t):
   1020         FIXME(31)
   1021 #         StringBuffer buf = new StringBuffer(50);
   1022 #         buf.append("consumeNode");
   1023 #         serializeNode(buf, t);
   1024 #         transmit(buf.toString());
   1025 
   1026 
   1027     def LT_tree(self, i, t):
   1028         FIXME(34)
   1029 #         int ID = adaptor.getUniqueID(t);
   1030 #         String text = adaptor.getText(t);
   1031 #         int type = adaptor.getType(t);
   1032 #         StringBuffer buf = new StringBuffer(50);
   1033 #         buf.append("LN\t"); // lookahead node; distinguish from LT in protocol
   1034 #         buf.append(i);
   1035 #         serializeNode(buf, t);
   1036 #         transmit(buf.toString());
   1037 
   1038 
   1039     def serializeNode(self, buf, t):
   1040         FIXME(33)
   1041 #         int ID = adaptor.getUniqueID(t);
   1042 #         String text = adaptor.getText(t);
   1043 #         int type = adaptor.getType(t);
   1044 #         buf.append("\t");
   1045 #         buf.append(ID);
   1046 #         buf.append("\t");
   1047 #         buf.append(type);
   1048 #         Token token = adaptor.getToken(t);
   1049 #         int line = -1;
   1050 #         int pos = -1;
   1051 #         if ( token!=null ) {
   1052 #             line = token.getLine();
   1053 #             pos = token.getCharPositionInLine();
   1054 #             }
   1055 #         buf.append("\t");
   1056 #         buf.append(line);
   1057 #         buf.append("\t");
   1058 #         buf.append(pos);
   1059 #         int tokenIndex = adaptor.getTokenStartIndex(t);
   1060 #         buf.append("\t");
   1061 #         buf.append(tokenIndex);
   1062 #         serializeText(buf, text);
   1063 
   1064 
   1065     ## A S T  E v e n t s
   1066 
   1067     def nilNode(self, t):
   1068         self.transmit("nilNode\t%d" % self.adaptor.getUniqueID(t))
   1069 
   1070 
   1071     def errorNode(self, t):
   1072         self.transmit("errorNode\t%d\t%d\t\"%s" % (
   1073              self.adaptor.getUniqueID(t),
   1074              Token.INVALID_TOKEN_TYPE,
   1075              self.escapeNewlines(t.toString())))
   1076 
   1077 
   1078 
   1079     def createNode(self, node, token=None):
   1080         if token is not None:
   1081             self.transmit("createNode\t%d\t%d" % (
   1082                     self.adaptor.getUniqueID(node),
   1083                     token.getTokenIndex()))
   1084 
   1085         else:
   1086             self.transmit("createNodeFromTokenElements\t%d\t%d\t\"%s" % (
   1087                     self.adaptor.getUniqueID(node),
   1088                     self.adaptor.getType(node),
   1089                     self.adaptor.getText(node)))
   1090 
   1091 
   1092     def becomeRoot(self, newRoot, oldRoot):
   1093         self.transmit("becomeRoot\t%d\t%d" % (
   1094                 self.adaptor.getUniqueID(newRoot),
   1095                 self.adaptor.getUniqueID(oldRoot)))
   1096 
   1097 
   1098     def addChild(self, root, child):
   1099         self.transmit("addChild\t%d\t%d" % (
   1100                 self.adaptor.getUniqueID(root),
   1101                 self.adaptor.getUniqueID(child)))
   1102 
   1103 
   1104     def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex):
   1105         self.transmit("setTokenBoundaries\t%d\t%d\t%d" % (
   1106                 self.adaptor.getUniqueID(t),
   1107                 tokenStartIndex, tokenStopIndex))
   1108 
   1109 
   1110 
   1111     ## support
   1112 
   1113     def setTreeAdaptor(self, adaptor):
   1114         self.adaptor = adaptor
   1115 
   1116     def getTreeAdaptor(self):
   1117         return self.adaptor
   1118 
   1119 
   1120     def serializeToken(self, t):
   1121         buf = [str(int(t.getTokenIndex())),
   1122                str(int(t.getType())),
   1123                str(int(t.getChannel())),
   1124                str(int(t.getLine() or 0)),
   1125                str(int(t.getCharPositionInLine() or 0)),
   1126                '\"' + self.escapeNewlines(t.getText())]
   1127         return '\t'.join(buf)
   1128 
   1129 
   1130     def escapeNewlines(self, txt):
   1131         if txt is None:
   1132             return ''
   1133 
   1134         txt = txt.replace("%","%25")   # escape all escape char ;)
   1135         txt = txt.replace("\n","%0A")  # escape \n
   1136         txt = txt.replace("\r","%0D")  # escape \r
   1137         return txt
   1138