Home | History | Annotate | Download | only in antlr3
      1 """ANTLR3 runtime package"""
      2 
      3 # begin[licence]
      4 #
      5 # [The "BSD licence"]
      6 # Copyright (c) 2005-2008 Terence Parr
      7 # All rights reserved.
      8 #
      9 # Redistribution and use in source and binary forms, with or without
     10 # modification, are permitted provided that the following conditions
     11 # are met:
     12 # 1. Redistributions of source code must retain the above copyright
     13 #    notice, this list of conditions and the following disclaimer.
     14 # 2. Redistributions in binary form must reproduce the above copyright
     15 #    notice, this list of conditions and the following disclaimer in the
     16 #    documentation and/or other materials provided with the distribution.
     17 # 3. The name of the author may not be used to endorse or promote products
     18 #    derived from this software without specific prior written permission.
     19 #
     20 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     21 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     22 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     23 # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     24 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     25 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     29 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 #
     31 # end[licence]
     32 
     33 import codecs
     34 from StringIO import StringIO
     35 
     36 from antlr3.constants import DEFAULT_CHANNEL, EOF
     37 from antlr3.tokens import Token, CommonToken
     38 
     39 
     40 ############################################################################
     41 #
     42 # basic interfaces
     43 #   IntStream
     44 #    +- CharStream
     45 #    \- TokenStream
     46 #
     47 # subclasses must implemented all methods
     48 #
     49 ############################################################################
     50 
     51 class IntStream(object):
     52     """
     53     @brief Base interface for streams of integer values.
     54 
     55     A simple stream of integers used when all I care about is the char
     56     or token type sequence (such as interpretation).
     57     """
     58 
     59     def consume(self):
     60         raise NotImplementedError
     61 
     62 
     63     def LA(self, i):
     64         """Get int at current input pointer + i ahead where i=1 is next int.
     65 
     66         Negative indexes are allowed.  LA(-1) is previous token (token
     67 	just matched).  LA(-i) where i is before first token should
     68 	yield -1, invalid char / EOF.
     69 	"""
     70 
     71         raise NotImplementedError
     72 
     73 
     74     def mark(self):
     75         """
     76         Tell the stream to start buffering if it hasn't already.  Return
     77         current input position, index(), or some other marker so that
     78         when passed to rewind() you get back to the same spot.
     79         rewind(mark()) should not affect the input cursor.  The Lexer
     80         track line/col info as well as input index so its markers are
     81         not pure input indexes.  Same for tree node streams.
     82         """
     83 
     84         raise NotImplementedError
     85 
     86 
     87     def index(self):
     88         """
     89         Return the current input symbol index 0..n where n indicates the
     90         last symbol has been read.  The index is the symbol about to be
     91         read not the most recently read symbol.
     92         """
     93 
     94         raise NotImplementedError
     95 
     96 
     97     def rewind(self, marker=None):
     98         """
     99         Reset the stream so that next call to index would return marker.
    100         The marker will usually be index() but it doesn't have to be.  It's
    101         just a marker to indicate what state the stream was in.  This is
    102         essentially calling release() and seek().  If there are markers
    103         created after this marker argument, this routine must unroll them
    104         like a stack.  Assume the state the stream was in when this marker
    105         was created.
    106 
    107         If marker is None:
    108         Rewind to the input position of the last marker.
    109         Used currently only after a cyclic DFA and just
    110         before starting a sem/syn predicate to get the
    111         input position back to the start of the decision.
    112         Do not "pop" the marker off the state.  mark(i)
    113         and rewind(i) should balance still. It is
    114         like invoking rewind(last marker) but it should not "pop"
    115         the marker off.  It's like seek(last marker's input position).
    116 	"""
    117 
    118         raise NotImplementedError
    119 
    120 
    121     def release(self, marker=None):
    122         """
    123         You may want to commit to a backtrack but don't want to force the
    124         stream to keep bookkeeping objects around for a marker that is
    125         no longer necessary.  This will have the same behavior as
    126         rewind() except it releases resources without the backward seek.
    127         This must throw away resources for all markers back to the marker
    128         argument.  So if you're nested 5 levels of mark(), and then release(2)
    129         you have to release resources for depths 2..5.
    130 	"""
    131 
    132         raise NotImplementedError
    133 
    134 
    135     def seek(self, index):
    136         """
    137         Set the input cursor to the position indicated by index.  This is
    138         normally used to seek ahead in the input stream.  No buffering is
    139         required to do this unless you know your stream will use seek to
    140         move backwards such as when backtracking.
    141 
    142         This is different from rewind in its multi-directional
    143         requirement and in that its argument is strictly an input cursor
    144         (index).
    145 
    146         For char streams, seeking forward must update the stream state such
    147         as line number.  For seeking backwards, you will be presumably
    148         backtracking using the mark/rewind mechanism that restores state and
    149         so this method does not need to update state when seeking backwards.
    150 
    151         Currently, this method is only used for efficient backtracking using
    152         memoization, but in the future it may be used for incremental parsing.
    153 
    154         The index is 0..n-1.  A seek to position i means that LA(1) will
    155         return the ith symbol.  So, seeking to 0 means LA(1) will return the
    156         first element in the stream.
    157         """
    158 
    159         raise NotImplementedError
    160 
    161 
    162     def size(self):
    163         """
    164         Only makes sense for streams that buffer everything up probably, but
    165         might be useful to display the entire stream or for testing.  This
    166         value includes a single EOF.
    167 	"""
    168 
    169         raise NotImplementedError
    170 
    171 
    172     def getSourceName(self):
    173         """
    174         Where are you getting symbols from?  Normally, implementations will
    175         pass the buck all the way to the lexer who can ask its input stream
    176         for the file name or whatever.
    177         """
    178 
    179         raise NotImplementedError
    180 
    181 
    182 class CharStream(IntStream):
    183     """
    184     @brief A source of characters for an ANTLR lexer.
    185 
    186     This is an abstract class that must be implemented by a subclass.
    187 
    188     """
    189 
    190     # pylint does not realize that this is an interface, too
    191     #pylint: disable-msg=W0223
    192 
    193     EOF = -1
    194 
    195 
    196     def substring(self, start, stop):
    197         """
    198         For infinite streams, you don't need this; primarily I'm providing
    199         a useful interface for action code.  Just make sure actions don't
    200         use this on streams that don't support it.
    201         """
    202 
    203         raise NotImplementedError
    204 
    205 
    206     def LT(self, i):
    207         """
    208         Get the ith character of lookahead.  This is the same usually as
    209         LA(i).  This will be used for labels in the generated
    210         lexer code.  I'd prefer to return a char here type-wise, but it's
    211         probably better to be 32-bit clean and be consistent with LA.
    212         """
    213 
    214         raise NotImplementedError
    215 
    216 
    217     def getLine(self):
    218         """ANTLR tracks the line information automatically"""
    219 
    220         raise NotImplementedError
    221 
    222 
    223     def setLine(self, line):
    224         """
    225         Because this stream can rewind, we need to be able to reset the line
    226         """
    227 
    228         raise NotImplementedError
    229 
    230 
    231     def getCharPositionInLine(self):
    232         """
    233         The index of the character relative to the beginning of the line 0..n-1
    234         """
    235 
    236         raise NotImplementedError
    237 
    238 
    239     def setCharPositionInLine(self, pos):
    240         raise NotImplementedError
    241 
    242 
    243 class TokenStream(IntStream):
    244     """
    245 
    246     @brief A stream of tokens accessing tokens from a TokenSource
    247 
    248     This is an abstract class that must be implemented by a subclass.
    249 
    250     """
    251 
    252     # pylint does not realize that this is an interface, too
    253     #pylint: disable-msg=W0223
    254 
    255     def LT(self, k):
    256         """
    257         Get Token at current input pointer + i ahead where i=1 is next Token.
    258         i<0 indicates tokens in the past.  So -1 is previous token and -2 is
    259         two tokens ago. LT(0) is undefined.  For i>=n, return Token.EOFToken.
    260         Return null for LT(0) and any index that results in an absolute address
    261         that is negative.
    262 	"""
    263 
    264         raise NotImplementedError
    265 
    266 
    267     def range(self):
    268         """
    269         How far ahead has the stream been asked to look?  The return
    270         value is a valid index from 0..n-1.
    271         """
    272 
    273         raise NotImplementedError
    274 
    275 
    276     def get(self, i):
    277         """
    278         Get a token at an absolute index i; 0..n-1.  This is really only
    279         needed for profiling and debugging and token stream rewriting.
    280         If you don't want to buffer up tokens, then this method makes no
    281         sense for you.  Naturally you can't use the rewrite stream feature.
    282         I believe DebugTokenStream can easily be altered to not use
    283         this method, removing the dependency.
    284         """
    285 
    286         raise NotImplementedError
    287 
    288 
    289     def getTokenSource(self):
    290         """
    291         Where is this stream pulling tokens from?  This is not the name, but
    292         the object that provides Token objects.
    293 	"""
    294 
    295         raise NotImplementedError
    296 
    297 
    298     def toString(self, start=None, stop=None):
    299         """
    300         Return the text of all tokens from start to stop, inclusive.
    301         If the stream does not buffer all the tokens then it can just
    302         return "" or null;  Users should not access $ruleLabel.text in
    303         an action of course in that case.
    304 
    305         Because the user is not required to use a token with an index stored
    306         in it, we must provide a means for two token objects themselves to
    307         indicate the start/end location.  Most often this will just delegate
    308         to the other toString(int,int).  This is also parallel with
    309         the TreeNodeStream.toString(Object,Object).
    310 	"""
    311 
    312         raise NotImplementedError
    313 
    314 
    315 ############################################################################
    316 #
    317 # character streams for use in lexers
    318 #   CharStream
    319 #   \- ANTLRStringStream
    320 #
    321 ############################################################################
    322 
    323 
    324 class ANTLRStringStream(CharStream):
    325     """
    326     @brief CharStream that pull data from a unicode string.
    327 
    328     A pretty quick CharStream that pulls all data from an array
    329     directly.  Every method call counts in the lexer.
    330 
    331     """
    332 
    333 
    334     def __init__(self, data):
    335         """
    336         @param data This should be a unicode string holding the data you want
    337            to parse. If you pass in a byte string, the Lexer will choke on
    338            non-ascii data.
    339 
    340         """
    341 
    342         CharStream.__init__(self)
    343 
    344   	# The data being scanned
    345         self.strdata = unicode(data)
    346         self.data = [ord(c) for c in self.strdata]
    347 
    348 	# How many characters are actually in the buffer
    349         self.n = len(data)
    350 
    351  	# 0..n-1 index into string of next char
    352         self.p = 0
    353 
    354 	# line number 1..n within the input
    355         self.line = 1
    356 
    357  	# The index of the character relative to the beginning of the
    358         # line 0..n-1
    359         self.charPositionInLine = 0
    360 
    361 	# A list of CharStreamState objects that tracks the stream state
    362         # values line, charPositionInLine, and p that can change as you
    363         # move through the input stream.  Indexed from 0..markDepth-1.
    364         self._markers = [ ]
    365         self.lastMarker = None
    366         self.markDepth = 0
    367 
    368         # What is name or source of this char stream?
    369         self.name = None
    370 
    371 
    372     def reset(self):
    373         """
    374         Reset the stream so that it's in the same state it was
    375         when the object was created *except* the data array is not
    376         touched.
    377         """
    378 
    379         self.p = 0
    380         self.line = 1
    381         self.charPositionInLine = 0
    382         self._markers = [ ]
    383 
    384 
    385     def consume(self):
    386         try:
    387             if self.data[self.p] == 10: # \n
    388                 self.line += 1
    389                 self.charPositionInLine = 0
    390             else:
    391                 self.charPositionInLine += 1
    392 
    393             self.p += 1
    394 
    395         except IndexError:
    396             # happend when we reached EOF and self.data[self.p] fails
    397             # just do nothing
    398             pass
    399 
    400 
    401 
    402     def LA(self, i):
    403         if i == 0:
    404             return 0 # undefined
    405 
    406         if i < 0:
    407             i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
    408 
    409         try:
    410             return self.data[self.p+i-1]
    411         except IndexError:
    412             return EOF
    413 
    414 
    415 
    416     def LT(self, i):
    417         if i == 0:
    418             return 0 # undefined
    419 
    420         if i < 0:
    421             i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
    422 
    423         try:
    424             return self.strdata[self.p+i-1]
    425         except IndexError:
    426             return EOF
    427 
    428 
    429     def index(self):
    430         """
    431         Return the current input symbol index 0..n where n indicates the
    432         last symbol has been read.  The index is the index of char to
    433         be returned from LA(1).
    434         """
    435 
    436         return self.p
    437 
    438 
    439     def size(self):
    440         return self.n
    441 
    442 
    443     def mark(self):
    444         state = (self.p, self.line, self.charPositionInLine)
    445         try:
    446             self._markers[self.markDepth] = state
    447         except IndexError:
    448             self._markers.append(state)
    449         self.markDepth += 1
    450 
    451         self.lastMarker = self.markDepth
    452 
    453         return self.lastMarker
    454 
    455 
    456     def rewind(self, marker=None):
    457         if marker is None:
    458             marker = self.lastMarker
    459 
    460         p, line, charPositionInLine = self._markers[marker-1]
    461 
    462         self.seek(p)
    463         self.line = line
    464         self.charPositionInLine = charPositionInLine
    465         self.release(marker)
    466 
    467 
    468     def release(self, marker=None):
    469         if marker is None:
    470             marker = self.lastMarker
    471 
    472         self.markDepth = marker-1
    473 
    474 
    475     def seek(self, index):
    476         """
    477         consume() ahead until p==index; can't just set p=index as we must
    478         update line and charPositionInLine.
    479         """
    480 
    481         if index <= self.p:
    482             self.p = index # just jump; don't update stream state (line, ...)
    483             return
    484 
    485         # seek forward, consume until p hits index
    486         while self.p < index:
    487             self.consume()
    488 
    489 
    490     def substring(self, start, stop):
    491         return self.strdata[start:stop+1]
    492 
    493 
    494     def getLine(self):
    495         """Using setter/getter methods is deprecated. Use o.line instead."""
    496         return self.line
    497 
    498 
    499     def getCharPositionInLine(self):
    500         """
    501         Using setter/getter methods is deprecated. Use o.charPositionInLine
    502         instead.
    503         """
    504         return self.charPositionInLine
    505 
    506 
    507     def setLine(self, line):
    508         """Using setter/getter methods is deprecated. Use o.line instead."""
    509         self.line = line
    510 
    511 
    512     def setCharPositionInLine(self, pos):
    513         """
    514         Using setter/getter methods is deprecated. Use o.charPositionInLine
    515         instead.
    516         """
    517         self.charPositionInLine = pos
    518 
    519 
    520     def getSourceName(self):
    521         return self.name
    522 
    523 
    524 class ANTLRFileStream(ANTLRStringStream):
    525     """
    526     @brief CharStream that opens a file to read the data.
    527 
    528     This is a char buffer stream that is loaded from a file
    529     all at once when you construct the object.
    530     """
    531 
    532     def __init__(self, fileName, encoding=None):
    533         """
    534         @param fileName The path to the file to be opened. The file will be
    535            opened with mode 'rb'.
    536 
    537         @param encoding If you set the optional encoding argument, then the
    538            data will be decoded on the fly.
    539 
    540         """
    541 
    542         self.fileName = fileName
    543 
    544         fp = codecs.open(fileName, 'rb', encoding)
    545         try:
    546             data = fp.read()
    547         finally:
    548             fp.close()
    549 
    550         ANTLRStringStream.__init__(self, data)
    551 
    552 
    553     def getSourceName(self):
    554         """Deprecated, access o.fileName directly."""
    555 
    556         return self.fileName
    557 
    558 
    559 class ANTLRInputStream(ANTLRStringStream):
    560     """
    561     @brief CharStream that reads data from a file-like object.
    562 
    563     This is a char buffer stream that is loaded from a file like object
    564     all at once when you construct the object.
    565 
    566     All input is consumed from the file, but it is not closed.
    567     """
    568 
    569     def __init__(self, file, encoding=None):
    570         """
    571         @param file A file-like object holding your input. Only the read()
    572            method must be implemented.
    573 
    574         @param encoding If you set the optional encoding argument, then the
    575            data will be decoded on the fly.
    576 
    577         """
    578 
    579         if encoding is not None:
    580             # wrap input in a decoding reader
    581             reader = codecs.lookup(encoding)[2]
    582             file = reader(file)
    583 
    584         data = file.read()
    585 
    586         ANTLRStringStream.__init__(self, data)
    587 
    588 
    589 # I guess the ANTLR prefix exists only to avoid a name clash with some Java
    590 # mumbojumbo. A plain "StringStream" looks better to me, which should be
    591 # the preferred name in Python.
    592 StringStream = ANTLRStringStream
    593 FileStream = ANTLRFileStream
    594 InputStream = ANTLRInputStream
    595 
    596 
    597 ############################################################################
    598 #
    599 # Token streams
    600 #   TokenStream
    601 #   +- CommonTokenStream
    602 #   \- TokenRewriteStream
    603 #
    604 ############################################################################
    605 
    606 
    607 class CommonTokenStream(TokenStream):
    608     """
    609     @brief The most common stream of tokens
    610 
    611     The most common stream of tokens is one where every token is buffered up
    612     and tokens are prefiltered for a certain channel (the parser will only
    613     see these tokens and cannot change the filter channel number during the
    614     parse).
    615     """
    616 
    617     def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL):
    618         """
    619         @param tokenSource A TokenSource instance (usually a Lexer) to pull
    620             the tokens from.
    621 
    622         @param channel Skip tokens on any channel but this one; this is how we
    623             skip whitespace...
    624 
    625         """
    626 
    627         TokenStream.__init__(self)
    628 
    629         self.tokenSource = tokenSource
    630 
    631 	# Record every single token pulled from the source so we can reproduce
    632         # chunks of it later.
    633         self.tokens = []
    634 
    635 	# Map<tokentype, channel> to override some Tokens' channel numbers
    636         self.channelOverrideMap = {}
    637 
    638 	# Set<tokentype>; discard any tokens with this type
    639         self.discardSet = set()
    640 
    641 	# Skip tokens on any channel but this one; this is how we skip
    642         # whitespace...
    643         self.channel = channel
    644 
    645 	# By default, track all incoming tokens
    646         self.discardOffChannelTokens = False
    647 
    648 	# The index into the tokens list of the current token (next token
    649         # to consume).  p==-1 indicates that the tokens list is empty
    650         self.p = -1
    651 
    652         # Remember last marked position
    653         self.lastMarker = None
    654 
    655         # how deep have we gone?
    656         self._range = -1
    657 
    658 
    659     def makeEOFToken(self):
    660         return self.tokenSource.makeEOFToken()
    661 
    662 
    663     def setTokenSource(self, tokenSource):
    664         """Reset this token stream by setting its token source."""
    665 
    666         self.tokenSource = tokenSource
    667         self.tokens = []
    668         self.p = -1
    669         self.channel = DEFAULT_CHANNEL
    670 
    671 
    672     def reset(self):
    673         self.p = 0
    674         self.lastMarker = None
    675 
    676 
    677     def fillBuffer(self):
    678         """
    679         Load all tokens from the token source and put in tokens.
    680 	This is done upon first LT request because you might want to
    681         set some token type / channel overrides before filling buffer.
    682         """
    683 
    684 
    685         index = 0
    686         t = self.tokenSource.nextToken()
    687         while t is not None and t.type != EOF:
    688             discard = False
    689 
    690             if self.discardSet is not None and t.type in self.discardSet:
    691                 discard = True
    692 
    693             elif self.discardOffChannelTokens and t.channel != self.channel:
    694                 discard = True
    695 
    696             # is there a channel override for token type?
    697             try:
    698                 overrideChannel = self.channelOverrideMap[t.type]
    699 
    700             except KeyError:
    701                 # no override for this type
    702                 pass
    703 
    704             else:
    705                 if overrideChannel == self.channel:
    706                     t.channel = overrideChannel
    707                 else:
    708                     discard = True
    709 
    710             if not discard:
    711                 t.index = index
    712                 self.tokens.append(t)
    713                 index += 1
    714 
    715             t = self.tokenSource.nextToken()
    716 
    717         # leave p pointing at first token on channel
    718         self.p = 0
    719         self.p = self.skipOffTokenChannels(self.p)
    720 
    721 
    722     def consume(self):
    723         """
    724         Move the input pointer to the next incoming token.  The stream
    725         must become active with LT(1) available.  consume() simply
    726         moves the input pointer so that LT(1) points at the next
    727         input symbol. Consume at least one token.
    728 
    729         Walk past any token not on the channel the parser is listening to.
    730         """
    731 
    732         if self.p < len(self.tokens):
    733             self.p += 1
    734 
    735             self.p = self.skipOffTokenChannels(self.p) # leave p on valid token
    736 
    737 
    738     def skipOffTokenChannels(self, i):
    739         """
    740         Given a starting index, return the index of the first on-channel
    741         token.
    742         """
    743 
    744         try:
    745             while self.tokens[i].channel != self.channel:
    746                 i += 1
    747         except IndexError:
    748             # hit the end of token stream
    749             pass
    750 
    751         return i
    752 
    753 
    754     def skipOffTokenChannelsReverse(self, i):
    755         while i >= 0 and self.tokens[i].channel != self.channel:
    756             i -= 1
    757 
    758         return i
    759 
    760 
    761     def setTokenTypeChannel(self, ttype, channel):
    762         """
    763         A simple filter mechanism whereby you can tell this token stream
    764         to force all tokens of type ttype to be on channel.  For example,
    765         when interpreting, we cannot exec actions so we need to tell
    766         the stream to force all WS and NEWLINE to be a different, ignored
    767         channel.
    768 	"""
    769 
    770         self.channelOverrideMap[ttype] = channel
    771 
    772 
    773     def discardTokenType(self, ttype):
    774         self.discardSet.add(ttype)
    775 
    776 
    777     def getTokens(self, start=None, stop=None, types=None):
    778         """
    779         Given a start and stop index, return a list of all tokens in
    780         the token type set.  Return None if no tokens were found.  This
    781         method looks at both on and off channel tokens.
    782         """
    783 
    784         if self.p == -1:
    785             self.fillBuffer()
    786 
    787         if stop is None or stop >= len(self.tokens):
    788             stop = len(self.tokens) - 1
    789 
    790         if start is None or stop < 0:
    791             start = 0
    792 
    793         if start > stop:
    794             return None
    795 
    796         if isinstance(types, (int, long)):
    797             # called with a single type, wrap into set
    798             types = set([types])
    799 
    800         filteredTokens = [
    801             token for token in self.tokens[start:stop]
    802             if types is None or token.type in types
    803             ]
    804 
    805         if len(filteredTokens) == 0:
    806             return None
    807 
    808         return filteredTokens
    809 
    810 
    811     def LT(self, k):
    812         """
    813         Get the ith token from the current position 1..n where k=1 is the
    814         first symbol of lookahead.
    815         """
    816 
    817         if self.p == -1:
    818             self.fillBuffer()
    819 
    820         if k == 0:
    821             return None
    822 
    823         if k < 0:
    824             return self.LB(-k)
    825 
    826         i = self.p
    827         n = 1
    828         # find k good tokens
    829         while n < k:
    830             # skip off-channel tokens
    831             i = self.skipOffTokenChannels(i+1) # leave p on valid token
    832             n += 1
    833 
    834         if i > self._range:
    835             self._range = i
    836 
    837         try:
    838             return self.tokens[i]
    839         except IndexError:
    840             return self.makeEOFToken()
    841 
    842 
    843     def LB(self, k):
    844         """Look backwards k tokens on-channel tokens"""
    845 
    846         if self.p == -1:
    847             self.fillBuffer()
    848 
    849         if k == 0:
    850             return None
    851 
    852         if self.p - k < 0:
    853             return None
    854 
    855         i = self.p
    856         n = 1
    857         # find k good tokens looking backwards
    858         while n <= k:
    859             # skip off-channel tokens
    860             i = self.skipOffTokenChannelsReverse(i-1) # leave p on valid token
    861             n += 1
    862 
    863         if i < 0:
    864             return None
    865 
    866         return self.tokens[i]
    867 
    868 
    869     def get(self, i):
    870         """
    871         Return absolute token i; ignore which channel the tokens are on;
    872         that is, count all tokens not just on-channel tokens.
    873         """
    874 
    875         return self.tokens[i]
    876 
    877 
    878     def slice(self, start, stop):
    879         if self.p == -1:
    880             self.fillBuffer()
    881 
    882         if start < 0 or stop < 0:
    883             return None
    884 
    885         return self.tokens[start:stop+1]
    886 
    887 
    888     def LA(self, i):
    889         return self.LT(i).type
    890 
    891 
    892     def mark(self):
    893         self.lastMarker = self.index()
    894         return self.lastMarker
    895 
    896 
    897     def release(self, marker=None):
    898         # no resources to release
    899         pass
    900 
    901 
    902     def size(self):
    903         return len(self.tokens)
    904 
    905 
    906     def range(self):
    907         return self._range
    908 
    909 
    910     def index(self):
    911         return self.p
    912 
    913 
    914     def rewind(self, marker=None):
    915         if marker is None:
    916             marker = self.lastMarker
    917 
    918         self.seek(marker)
    919 
    920 
    921     def seek(self, index):
    922         self.p = index
    923 
    924 
    925     def getTokenSource(self):
    926         return self.tokenSource
    927 
    928 
    929     def getSourceName(self):
    930         return self.tokenSource.getSourceName()
    931 
    932 
    933     def toString(self, start=None, stop=None):
    934         if self.p == -1:
    935             self.fillBuffer()
    936 
    937         if start is None:
    938             start = 0
    939         elif not isinstance(start, int):
    940             start = start.index
    941 
    942         if stop is None:
    943             stop = len(self.tokens) - 1
    944         elif not isinstance(stop, int):
    945             stop = stop.index
    946 
    947         if stop >= len(self.tokens):
    948             stop = len(self.tokens) - 1
    949 
    950         return ''.join([t.text for t in self.tokens[start:stop+1]])
    951 
    952 
    953 class RewriteOperation(object):
    954     """@brief Internal helper class."""
    955 
    956     def __init__(self, stream, index, text):
    957         self.stream = stream
    958 
    959         # What index into rewrites List are we?
    960         self.instructionIndex = None
    961 
    962         # Token buffer index.
    963         self.index = index
    964         self.text = text
    965 
    966     def execute(self, buf):
    967         """Execute the rewrite operation by possibly adding to the buffer.
    968         Return the index of the next token to operate on.
    969         """
    970 
    971         return self.index
    972 
    973     def toString(self):
    974         opName = self.__class__.__name__
    975         return '<%s@%d:"%s">' % (
    976             opName, self.index, self.text)
    977 
    978     __str__ = toString
    979     __repr__ = toString
    980 
    981 
    982 class InsertBeforeOp(RewriteOperation):
    983     """@brief Internal helper class."""
    984 
    985     def execute(self, buf):
    986         buf.write(self.text)
    987         if self.stream.tokens[self.index].type != EOF:
    988             buf.write(self.stream.tokens[self.index].text)
    989         return self.index + 1
    990 
    991 
    992 class ReplaceOp(RewriteOperation):
    993     """
    994     @brief Internal helper class.
    995 
    996     I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
    997     instructions.
    998     """
    999 
   1000     def __init__(self, stream, first, last, text):
   1001         RewriteOperation.__init__(self, stream, first, text)
   1002         self.lastIndex = last
   1003 
   1004 
   1005     def execute(self, buf):
   1006         if self.text is not None:
   1007             buf.write(self.text)
   1008 
   1009         return self.lastIndex + 1
   1010 
   1011 
   1012     def toString(self):
   1013         if self.text is None:
   1014             return '<DeleteOp@%d..%d>' % (self.index, self.lastIndex)
   1015 
   1016         return '<ReplaceOp@%d..%d:"%s">' % (
   1017             self.index, self.lastIndex, self.text)
   1018 
   1019     __str__ = toString
   1020     __repr__ = toString
   1021 
   1022 
   1023 class TokenRewriteStream(CommonTokenStream):
   1024     """@brief CommonTokenStream that can be modified.
   1025 
   1026     Useful for dumping out the input stream after doing some
   1027     augmentation or other manipulations.
   1028 
   1029     You can insert stuff, replace, and delete chunks.  Note that the
   1030     operations are done lazily--only if you convert the buffer to a
   1031     String.  This is very efficient because you are not moving data around
   1032     all the time.  As the buffer of tokens is converted to strings, the
   1033     toString() method(s) check to see if there is an operation at the
   1034     current index.  If so, the operation is done and then normal String
   1035     rendering continues on the buffer.  This is like having multiple Turing
   1036     machine instruction streams (programs) operating on a single input tape. :)
   1037 
   1038     Since the operations are done lazily at toString-time, operations do not
   1039     screw up the token index values.  That is, an insert operation at token
   1040     index i does not change the index values for tokens i+1..n-1.
   1041 
   1042     Because operations never actually alter the buffer, you may always get
   1043     the original token stream back without undoing anything.  Since
   1044     the instructions are queued up, you can easily simulate transactions and
   1045     roll back any changes if there is an error just by removing instructions.
   1046     For example,
   1047 
   1048      CharStream input = new ANTLRFileStream("input");
   1049      TLexer lex = new TLexer(input);
   1050      TokenRewriteStream tokens = new TokenRewriteStream(lex);
   1051      T parser = new T(tokens);
   1052      parser.startRule();
   1053 
   1054      Then in the rules, you can execute
   1055         Token t,u;
   1056         ...
   1057         input.insertAfter(t, "text to put after t");}
   1058         input.insertAfter(u, "text after u");}
   1059         System.out.println(tokens.toString());
   1060 
   1061     Actually, you have to cast the 'input' to a TokenRewriteStream. :(
   1062 
   1063     You can also have multiple "instruction streams" and get multiple
   1064     rewrites from a single pass over the input.  Just name the instruction
   1065     streams and use that name again when printing the buffer.  This could be
   1066     useful for generating a C file and also its header file--all from the
   1067     same buffer:
   1068 
   1069         tokens.insertAfter("pass1", t, "text to put after t");}
   1070         tokens.insertAfter("pass2", u, "text after u");}
   1071         System.out.println(tokens.toString("pass1"));
   1072         System.out.println(tokens.toString("pass2"));
   1073 
   1074     If you don't use named rewrite streams, a "default" stream is used as
   1075     the first example shows.
   1076     """
   1077 
   1078     DEFAULT_PROGRAM_NAME = "default"
   1079     MIN_TOKEN_INDEX = 0
   1080 
   1081     def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL):
   1082         CommonTokenStream.__init__(self, tokenSource, channel)
   1083 
   1084         # You may have multiple, named streams of rewrite operations.
   1085         # I'm calling these things "programs."
   1086         #  Maps String (name) -> rewrite (List)
   1087         self.programs = {}
   1088         self.programs[self.DEFAULT_PROGRAM_NAME] = []
   1089 
   1090  	# Map String (program name) -> Integer index
   1091         self.lastRewriteTokenIndexes = {}
   1092 
   1093 
   1094     def rollback(self, *args):
   1095         """
   1096         Rollback the instruction stream for a program so that
   1097         the indicated instruction (via instructionIndex) is no
   1098         longer in the stream.  UNTESTED!
   1099         """
   1100 
   1101         if len(args) == 2:
   1102             programName = args[0]
   1103             instructionIndex = args[1]
   1104         elif len(args) == 1:
   1105             programName = self.DEFAULT_PROGRAM_NAME
   1106             instructionIndex = args[0]
   1107         else:
   1108             raise TypeError("Invalid arguments")
   1109 
   1110         p = self.programs.get(programName, None)
   1111         if p is not None:
   1112             self.programs[programName] = (
   1113                 p[self.MIN_TOKEN_INDEX:instructionIndex])
   1114 
   1115 
   1116     def deleteProgram(self, programName=DEFAULT_PROGRAM_NAME):
   1117         """Reset the program so that no instructions exist"""
   1118 
   1119         self.rollback(programName, self.MIN_TOKEN_INDEX)
   1120 
   1121 
   1122     def insertAfter(self, *args):
   1123         if len(args) == 2:
   1124             programName = self.DEFAULT_PROGRAM_NAME
   1125             index = args[0]
   1126             text = args[1]
   1127 
   1128         elif len(args) == 3:
   1129             programName = args[0]
   1130             index = args[1]
   1131             text = args[2]
   1132 
   1133         else:
   1134             raise TypeError("Invalid arguments")
   1135 
   1136         if isinstance(index, Token):
   1137             # index is a Token, grap the stream index from it
   1138             index = index.index
   1139 
   1140         # to insert after, just insert before next index (even if past end)
   1141         self.insertBefore(programName, index+1, text)
   1142 
   1143 
   1144     def insertBefore(self, *args):
   1145         if len(args) == 2:
   1146             programName = self.DEFAULT_PROGRAM_NAME
   1147             index = args[0]
   1148             text = args[1]
   1149 
   1150         elif len(args) == 3:
   1151             programName = args[0]
   1152             index = args[1]
   1153             text = args[2]
   1154 
   1155         else:
   1156             raise TypeError("Invalid arguments")
   1157 
   1158         if isinstance(index, Token):
   1159             # index is a Token, grap the stream index from it
   1160             index = index.index
   1161 
   1162         op = InsertBeforeOp(self, index, text)
   1163         rewrites = self.getProgram(programName)
   1164         op.instructionIndex = len(rewrites)
   1165         rewrites.append(op)
   1166 
   1167 
   1168     def replace(self, *args):
   1169         if len(args) == 2:
   1170             programName = self.DEFAULT_PROGRAM_NAME
   1171             first = args[0]
   1172             last = args[0]
   1173             text = args[1]
   1174 
   1175         elif len(args) == 3:
   1176             programName = self.DEFAULT_PROGRAM_NAME
   1177             first = args[0]
   1178             last = args[1]
   1179             text = args[2]
   1180 
   1181         elif len(args) == 4:
   1182             programName = args[0]
   1183             first = args[1]
   1184             last = args[2]
   1185             text = args[3]
   1186 
   1187         else:
   1188             raise TypeError("Invalid arguments")
   1189 
   1190         if isinstance(first, Token):
   1191             # first is a Token, grap the stream index from it
   1192             first = first.index
   1193 
   1194         if isinstance(last, Token):
   1195             # last is a Token, grap the stream index from it
   1196             last = last.index
   1197 
   1198         if first > last or first < 0 or last < 0 or last >= len(self.tokens):
   1199             raise ValueError(
   1200                 "replace: range invalid: %d..%d (size=%d)"
   1201                 % (first, last, len(self.tokens)))
   1202 
   1203         op = ReplaceOp(self, first, last, text)
   1204         rewrites = self.getProgram(programName)
   1205         op.instructionIndex = len(rewrites)
   1206         rewrites.append(op)
   1207 
   1208 
   1209     def delete(self, *args):
   1210         self.replace(*(list(args) + [None]))
   1211 
   1212 
   1213     def getLastRewriteTokenIndex(self, programName=DEFAULT_PROGRAM_NAME):
   1214         return self.lastRewriteTokenIndexes.get(programName, -1)
   1215 
   1216 
   1217     def setLastRewriteTokenIndex(self, programName, i):
   1218         self.lastRewriteTokenIndexes[programName] = i
   1219 
   1220 
   1221     def getProgram(self, name):
   1222         p = self.programs.get(name, None)
   1223         if p is  None:
   1224             p = self.initializeProgram(name)
   1225 
   1226         return p
   1227 
   1228 
   1229     def initializeProgram(self, name):
   1230         p = []
   1231         self.programs[name] = p
   1232         return p
   1233 
   1234 
   1235     def toOriginalString(self, start=None, end=None):
   1236         if self.p == -1:
   1237             self.fillBuffer()
   1238 
   1239         if start is None:
   1240             start = self.MIN_TOKEN_INDEX
   1241         if end is None:
   1242             end = self.size() - 1
   1243 
   1244         buf = StringIO()
   1245         i = start
   1246         while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens):
   1247             if self.get(i).type != EOF:
   1248                 buf.write(self.get(i).text)
   1249             i += 1
   1250 
   1251         return buf.getvalue()
   1252 
   1253 
   1254     def toString(self, *args):
   1255         if self.p == -1:
   1256             self.fillBuffer()
   1257 
   1258         if len(args) == 0:
   1259             programName = self.DEFAULT_PROGRAM_NAME
   1260             start = self.MIN_TOKEN_INDEX
   1261             end = self.size() - 1
   1262 
   1263         elif len(args) == 1:
   1264             programName = args[0]
   1265             start = self.MIN_TOKEN_INDEX
   1266             end = self.size() - 1
   1267 
   1268         elif len(args) == 2:
   1269             programName = self.DEFAULT_PROGRAM_NAME
   1270             start = args[0]
   1271             end = args[1]
   1272 
   1273         if start is None:
   1274             start = self.MIN_TOKEN_INDEX
   1275         elif not isinstance(start, int):
   1276             start = start.index
   1277 
   1278         if end is None:
   1279             end = len(self.tokens) - 1
   1280         elif not isinstance(end, int):
   1281             end = end.index
   1282 
   1283         # ensure start/end are in range
   1284         if end >= len(self.tokens):
   1285             end = len(self.tokens) - 1
   1286 
   1287         if start < 0:
   1288             start = 0
   1289 
   1290         rewrites = self.programs.get(programName)
   1291         if rewrites is None or len(rewrites) == 0:
   1292             # no instructions to execute
   1293             return self.toOriginalString(start, end)
   1294 
   1295         buf = StringIO()
   1296 
   1297         # First, optimize instruction stream
   1298         indexToOp = self.reduceToSingleOperationPerIndex(rewrites)
   1299 
   1300         # Walk buffer, executing instructions and emitting tokens
   1301         i = start
   1302         while i <= end and i < len(self.tokens):
   1303             op = indexToOp.get(i)
   1304             # remove so any left have index size-1
   1305             try:
   1306                 del indexToOp[i]
   1307             except KeyError:
   1308                 pass
   1309 
   1310             t = self.tokens[i]
   1311             if op is None:
   1312                 # no operation at that index, just dump token
   1313                 if t.type != EOF:
   1314                     buf.write(t.text)
   1315                 i += 1 # move to next token
   1316 
   1317             else:
   1318                 i = op.execute(buf) # execute operation and skip
   1319 
   1320         # include stuff after end if it's last index in buffer
   1321         # So, if they did an insertAfter(lastValidIndex, "foo"), include
   1322         # foo if end==lastValidIndex.
   1323         if end == len(self.tokens) - 1:
   1324             # Scan any remaining operations after last token
   1325             # should be included (they will be inserts).
   1326             for i in sorted(indexToOp.keys()):
   1327                 op = indexToOp[i]
   1328                 if op.index >= len(self.tokens)-1:
   1329                     buf.write(op.text)
   1330 
   1331         return buf.getvalue()
   1332 
   1333     __str__ = toString
   1334 
   1335 
   1336     def reduceToSingleOperationPerIndex(self, rewrites):
   1337         """
   1338         We need to combine operations and report invalid operations (like
   1339         overlapping replaces that are not completed nested).  Inserts to
   1340         same index need to be combined etc...   Here are the cases:
   1341 
   1342         I.i.u I.j.v                           leave alone, nonoverlapping
   1343         I.i.u I.i.v                           combine: Iivu
   1344 
   1345         R.i-j.u R.x-y.v | i-j in x-y          delete first R
   1346         R.i-j.u R.i-j.v                       delete first R
   1347         R.i-j.u R.x-y.v | x-y in i-j          ERROR
   1348         R.i-j.u R.x-y.v | boundaries overlap  ERROR
   1349 
   1350         Delete special case of replace (text==null):
   1351         D.i-j.u D.x-y.v |                     boundaries overlapcombine to
   1352                                               max(min)..max(right)
   1353 
   1354         I.i.u R.x-y.v   |                     i in (x+1)-ydelete I (since
   1355                                               insert before we're not deleting
   1356                                               i)
   1357         I.i.u R.x-y.v   |                     i not in (x+1)-yleave alone,
   1358                                               nonoverlapping
   1359 
   1360         R.x-y.v I.i.u   | i in x-y            ERROR
   1361         R.x-y.v I.x.u                         R.x-y.uv (combine, delete I)
   1362         R.x-y.v I.i.u   | i not in x-y        leave alone, nonoverlapping
   1363 
   1364         I.i.u = insert u before op @ index i
   1365         R.x-y.u = replace x-y indexed tokens with u
   1366 
   1367         First we need to examine replaces.  For any replace op:
   1368 
   1369           1. wipe out any insertions before op within that range.
   1370           2. Drop any replace op before that is contained completely within
   1371              that range.
   1372           3. Throw exception upon boundary overlap with any previous replace.
   1373 
   1374         Then we can deal with inserts:
   1375 
   1376           1. for any inserts to same index, combine even if not adjacent.
   1377           2. for any prior replace with same left boundary, combine this
   1378              insert with replace and delete this replace.
   1379           3. throw exception if index in same range as previous replace
   1380 
   1381         Don't actually delete; make op null in list. Easier to walk list.
   1382         Later we can throw as we add to index -> op map.
   1383 
   1384         Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
   1385         inserted stuff would be before the replace range.  But, if you
   1386         add tokens in front of a method body '{' and then delete the method
   1387         body, I think the stuff before the '{' you added should disappear too.
   1388 
   1389         Return a map from token index to operation.
   1390         """
   1391 
   1392         # WALK REPLACES
   1393         for i, rop in enumerate(rewrites):
   1394             if rop is None:
   1395                 continue
   1396 
   1397             if not isinstance(rop, ReplaceOp):
   1398                 continue
   1399 
   1400             # Wipe prior inserts within range
   1401             for j, iop in self.getKindOfOps(rewrites, InsertBeforeOp, i):
   1402                 if iop.index == rop.index:
   1403                     # E.g., insert before 2, delete 2..2; update replace
   1404                     # text to include insert before, kill insert
   1405                     rewrites[iop.instructionIndex] = None
   1406                     rop.text = self.catOpText(iop.text, rop.text)
   1407 
   1408                 elif iop.index > rop.index and iop.index <= rop.lastIndex:
   1409                     # delete insert as it's a no-op.
   1410                     rewrites[j] = None
   1411 
   1412             # Drop any prior replaces contained within
   1413             for j, prevRop in self.getKindOfOps(rewrites, ReplaceOp, i):
   1414                 if (prevRop.index >= rop.index
   1415                     and prevRop.lastIndex <= rop.lastIndex):
   1416                     # delete replace as it's a no-op.
   1417                     rewrites[j] = None
   1418                     continue
   1419 
   1420                 # throw exception unless disjoint or identical
   1421                 disjoint = (prevRop.lastIndex < rop.index
   1422                             or prevRop.index > rop.lastIndex)
   1423                 same = (prevRop.index == rop.index
   1424                         and prevRop.lastIndex == rop.lastIndex)
   1425 
   1426                 # Delete special case of replace (text==null):
   1427                 # D.i-j.u D.x-y.v| boundaries overlapcombine to
   1428                 # max(min)..max(right)
   1429                 if prevRop.text is None and rop.text is None and not disjoint:
   1430                     # kill first delete
   1431                     rewrites[prevRop.instructionIndex] = None
   1432 
   1433                     rop.index = min(prevRop.index, rop.index)
   1434                     rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex)
   1435 
   1436                 elif not disjoint and not same:
   1437                     raise ValueError(
   1438                         "replace op boundaries of %s overlap with previous %s"
   1439                         % (rop, prevRop))
   1440 
   1441         # WALK INSERTS
   1442         for i, iop in enumerate(rewrites):
   1443             if iop is None:
   1444                 continue
   1445 
   1446             if not isinstance(iop, InsertBeforeOp):
   1447                 continue
   1448 
   1449             # combine current insert with prior if any at same index
   1450             for j, prevIop in self.getKindOfOps(rewrites, InsertBeforeOp, i):
   1451                 if prevIop.index == iop.index: # combine objects
   1452                     # convert to strings...we're in process of toString'ing
   1453                     # whole token buffer so no lazy eval issue with any
   1454                     # templates
   1455                     iop.text = self.catOpText(iop.text, prevIop.text)
   1456                     # delete redundant prior insert
   1457                     rewrites[j] = None
   1458 
   1459             # look for replaces where iop.index is in range; error
   1460             for j, rop in self.getKindOfOps(rewrites, ReplaceOp, i):
   1461                 if iop.index == rop.index:
   1462                     rop.text = self.catOpText(iop.text, rop.text)
   1463                     # delete current insert
   1464                     rewrites[i] = None
   1465                     continue
   1466 
   1467                 if iop.index >= rop.index and iop.index <= rop.lastIndex:
   1468                     raise ValueError(
   1469                         "insert op %s within boundaries of previous %s"
   1470                         % (iop, rop))
   1471 
   1472         m = {}
   1473         for i, op in enumerate(rewrites):
   1474             if op is None:
   1475                 # ignore deleted ops
   1476                 continue
   1477 
   1478             assert op.index not in m, "should only be one op per index"
   1479             m[op.index] = op
   1480 
   1481         return m
   1482 
   1483 
   1484     def catOpText(self, a, b):
   1485         x = ""
   1486         y = ""
   1487         if a is not None:
   1488             x = a
   1489         if b is not None:
   1490             y = b
   1491         return x + y
   1492 
   1493 
   1494     def getKindOfOps(self, rewrites, kind, before=None):
   1495         """Get all operations before an index of a particular kind."""
   1496 
   1497         if before is None:
   1498             before = len(rewrites)
   1499         elif before > len(rewrites):
   1500             before = len(rewrites)
   1501 
   1502         for i, op in enumerate(rewrites[:before]):
   1503             if op is None:
   1504                 # ignore deleted
   1505                 continue
   1506             if op.__class__ == kind:
   1507                 yield i, op
   1508 
   1509 
   1510     def toDebugString(self, start=None, end=None):
   1511         if start is None:
   1512             start = self.MIN_TOKEN_INDEX
   1513         if end is None:
   1514             end = self.size() - 1
   1515 
   1516         buf = StringIO()
   1517         i = start
   1518         while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens):
   1519             buf.write(self.get(i))
   1520             i += 1
   1521 
   1522         return buf.getvalue()
   1523