Home | History | Annotate | Download | only in antlr3
      1 """ANTLR3 runtime package"""
      2 
      3 # begin[licence]
      4 #
      5 # [The "BSD licence"]
      6 # Copyright (c) 2005-2008 Terence Parr
      7 # All rights reserved.
      8 #
      9 # Redistribution and use in source and binary forms, with or without
     10 # modification, are permitted provided that the following conditions
     11 # are met:
     12 # 1. Redistributions of source code must retain the above copyright
     13 #    notice, this list of conditions and the following disclaimer.
     14 # 2. Redistributions in binary form must reproduce the above copyright
     15 #    notice, this list of conditions and the following disclaimer in the
     16 #    documentation and/or other materials provided with the distribution.
     17 # 3. The name of the author may not be used to endorse or promote products
     18 #    derived from this software without specific prior written permission.
     19 #
     20 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     21 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     22 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     23 # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     24 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     25 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     29 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 #
     31 # end[licence]
     32 
     33 from antlr3.constants import EOF, DEFAULT_CHANNEL, INVALID_TOKEN_TYPE
     34 
     35 ############################################################################
     36 #
     37 # basic token interface
     38 #
     39 ############################################################################
     40 
     41 class Token(object):
     42     """@brief Abstract token baseclass."""
     43 
     44     def getText(self):
     45         """@brief Get the text of the token.
     46 
     47         Using setter/getter methods is deprecated. Use o.text instead.
     48         """
     49         raise NotImplementedError
     50 
     51     def setText(self, text):
     52         """@brief Set the text of the token.
     53 
     54         Using setter/getter methods is deprecated. Use o.text instead.
     55         """
     56         raise NotImplementedError
     57 
     58 
     59     def getType(self):
     60         """@brief Get the type of the token.
     61 
     62         Using setter/getter methods is deprecated. Use o.type instead."""
     63 
     64         raise NotImplementedError
     65 
     66     def setType(self, ttype):
     67         """@brief Get the type of the token.
     68 
     69         Using setter/getter methods is deprecated. Use o.type instead."""
     70 
     71         raise NotImplementedError
     72 
     73 
     74     def getLine(self):
     75         """@brief Get the line number on which this token was matched
     76 
     77         Lines are numbered 1..n
     78 
     79         Using setter/getter methods is deprecated. Use o.line instead."""
     80 
     81         raise NotImplementedError
     82 
     83     def setLine(self, line):
     84         """@brief Set the line number on which this token was matched
     85 
     86         Using setter/getter methods is deprecated. Use o.line instead."""
     87 
     88         raise NotImplementedError
     89 
     90 
     91     def getCharPositionInLine(self):
     92         """@brief Get the column of the tokens first character,
     93 
     94         Columns are numbered 0..n-1
     95 
     96         Using setter/getter methods is deprecated. Use o.charPositionInLine instead."""
     97 
     98         raise NotImplementedError
     99 
    100     def setCharPositionInLine(self, pos):
    101         """@brief Set the column of the tokens first character,
    102 
    103         Using setter/getter methods is deprecated. Use o.charPositionInLine instead."""
    104 
    105         raise NotImplementedError
    106 
    107 
    108     def getChannel(self):
    109         """@brief Get the channel of the token
    110 
    111         Using setter/getter methods is deprecated. Use o.channel instead."""
    112 
    113         raise NotImplementedError
    114 
    115     def setChannel(self, channel):
    116         """@brief Set the channel of the token
    117 
    118         Using setter/getter methods is deprecated. Use o.channel instead."""
    119 
    120         raise NotImplementedError
    121 
    122 
    123     def getTokenIndex(self):
    124         """@brief Get the index in the input stream.
    125 
    126         An index from 0..n-1 of the token object in the input stream.
    127         This must be valid in order to use the ANTLRWorks debugger.
    128 
    129         Using setter/getter methods is deprecated. Use o.index instead."""
    130 
    131         raise NotImplementedError
    132 
    133     def setTokenIndex(self, index):
    134         """@brief Set the index in the input stream.
    135 
    136         Using setter/getter methods is deprecated. Use o.index instead."""
    137 
    138         raise NotImplementedError
    139 
    140 
    141     def getInputStream(self):
    142         """@brief From what character stream was this token created.
    143 
    144         You don't have to implement but it's nice to know where a Token
    145         comes from if you have include files etc... on the input."""
    146 
    147         raise NotImplementedError
    148 
    149     def setInputStream(self, input):
    150         """@brief From what character stream was this token created.
    151 
    152         You don't have to implement but it's nice to know where a Token
    153         comes from if you have include files etc... on the input."""
    154 
    155         raise NotImplementedError
    156 
    157 
    158 ############################################################################
    159 #
    160 # token implementations
    161 #
    162 # Token
    163 # +- CommonToken
    164 # \- ClassicToken
    165 #
    166 ############################################################################
    167 
    168 class CommonToken(Token):
    169     """@brief Basic token implementation.
    170 
    171     This implementation does not copy the text from the input stream upon
    172     creation, but keeps start/stop pointers into the stream to avoid
    173     unnecessary copy operations.
    174 
    175     """
    176 
    177     def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
    178                  input=None, start=None, stop=None, oldToken=None):
    179         Token.__init__(self)
    180 
    181         if oldToken is not None:
    182             self.type = oldToken.type
    183             self.line = oldToken.line
    184             self.charPositionInLine = oldToken.charPositionInLine
    185             self.channel = oldToken.channel
    186             self.index = oldToken.index
    187             self._text = oldToken._text
    188             self.input = oldToken.input
    189             if isinstance(oldToken, CommonToken):
    190                 self.start = oldToken.start
    191                 self.stop = oldToken.stop
    192 
    193         else:
    194             self.type = type
    195             self.input = input
    196             self.charPositionInLine = -1 # set to invalid position
    197             self.line = 0
    198             self.channel = channel
    199 
    200 	    #What token number is this from 0..n-1 tokens; < 0 implies invalid index
    201             self.index = -1
    202 
    203             # We need to be able to change the text once in a while.  If
    204             # this is non-null, then getText should return this.  Note that
    205             # start/stop are not affected by changing this.
    206             self._text = text
    207 
    208             # The char position into the input buffer where this token starts
    209             self.start = start
    210 
    211             # The char position into the input buffer where this token stops
    212             # This is the index of the last char, *not* the index after it!
    213             self.stop = stop
    214 
    215 
    216     def getText(self):
    217         if self._text is not None:
    218             return self._text
    219 
    220         if self.input is None:
    221             return None
    222 
    223         if self.start < self.input.size() and self.stop < self.input.size():
    224           return self.input.substring(self.start, self.stop)
    225 
    226         return '<EOF>'
    227 
    228 
    229     def setText(self, text):
    230         """
    231         Override the text for this token.  getText() will return this text
    232         rather than pulling from the buffer.  Note that this does not mean
    233         that start/stop indexes are not valid.  It means that that input
    234         was converted to a new string in the token object.
    235 	"""
    236         self._text = text
    237 
    238     text = property(getText, setText)
    239 
    240 
    241     def getType(self):
    242         return self.type
    243 
    244     def setType(self, ttype):
    245         self.type = ttype
    246 
    247     def getTypeName(self):
    248         return str(self.type)
    249 
    250     typeName = property(lambda s: s.getTypeName())
    251 
    252     def getLine(self):
    253         return self.line
    254 
    255     def setLine(self, line):
    256         self.line = line
    257 
    258 
    259     def getCharPositionInLine(self):
    260         return self.charPositionInLine
    261 
    262     def setCharPositionInLine(self, pos):
    263         self.charPositionInLine = pos
    264 
    265 
    266     def getChannel(self):
    267         return self.channel
    268 
    269     def setChannel(self, channel):
    270         self.channel = channel
    271 
    272 
    273     def getTokenIndex(self):
    274         return self.index
    275 
    276     def setTokenIndex(self, index):
    277         self.index = index
    278 
    279 
    280     def getInputStream(self):
    281         return self.input
    282 
    283     def setInputStream(self, input):
    284         self.input = input
    285 
    286 
    287     def __str__(self):
    288         if self.type == EOF:
    289             return "<EOF>"
    290 
    291         channelStr = ""
    292         if self.channel > 0:
    293             channelStr = ",channel=" + str(self.channel)
    294 
    295         txt = self.text
    296         if txt is not None:
    297             txt = txt.replace("\n","\\\\n")
    298             txt = txt.replace("\r","\\\\r")
    299             txt = txt.replace("\t","\\\\t")
    300         else:
    301             txt = "<no text>"
    302 
    303         return "[@%d,%d:%d=%r,<%s>%s,%d:%d]" % (
    304             self.index,
    305             self.start, self.stop,
    306             txt,
    307             self.typeName, channelStr,
    308             self.line, self.charPositionInLine
    309             )
    310 
    311 
    312 class ClassicToken(Token):
    313     """@brief Alternative token implementation.
    314 
    315     A Token object like we'd use in ANTLR 2.x; has an actual string created
    316     and associated with this object.  These objects are needed for imaginary
    317     tree nodes that have payload objects.  We need to create a Token object
    318     that has a string; the tree node will point at this token.  CommonToken
    319     has indexes into a char stream and hence cannot be used to introduce
    320     new strings.
    321     """
    322 
    323     def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL,
    324                  oldToken=None
    325                  ):
    326         Token.__init__(self)
    327 
    328         if oldToken is not None:
    329             self.text = oldToken.text
    330             self.type = oldToken.type
    331             self.line = oldToken.line
    332             self.charPositionInLine = oldToken.charPositionInLine
    333             self.channel = oldToken.channel
    334 
    335         self.text = text
    336         self.type = type
    337         self.line = None
    338         self.charPositionInLine = None
    339         self.channel = channel
    340         self.index = None
    341 
    342 
    343     def getText(self):
    344         return self.text
    345 
    346     def setText(self, text):
    347         self.text = text
    348 
    349 
    350     def getType(self):
    351         return self.type
    352 
    353     def setType(self, ttype):
    354         self.type = ttype
    355 
    356 
    357     def getLine(self):
    358         return self.line
    359 
    360     def setLine(self, line):
    361         self.line = line
    362 
    363 
    364     def getCharPositionInLine(self):
    365         return self.charPositionInLine
    366 
    367     def setCharPositionInLine(self, pos):
    368         self.charPositionInLine = pos
    369 
    370 
    371     def getChannel(self):
    372         return self.channel
    373 
    374     def setChannel(self, channel):
    375         self.channel = channel
    376 
    377 
    378     def getTokenIndex(self):
    379         return self.index
    380 
    381     def setTokenIndex(self, index):
    382         self.index = index
    383 
    384 
    385     def getInputStream(self):
    386         return None
    387 
    388     def setInputStream(self, input):
    389         pass
    390 
    391 
    392     def toString(self):
    393         channelStr = ""
    394         if self.channel > 0:
    395             channelStr = ",channel=" + str(self.channel)
    396 
    397         txt = self.text
    398         if txt is None:
    399             txt = "<no text>"
    400 
    401         return "[@%r,%r,<%r>%s,%r:%r]" % (self.index,
    402                                           txt,
    403                                           self.type,
    404                                           channelStr,
    405                                           self.line,
    406                                           self.charPositionInLine
    407                                           )
    408 
    409 
    410     __str__ = toString
    411     __repr__ = toString
    412 
    413 
    414 INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
    415 
    416 # In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
    417 # will avoid creating a token for this symbol and try to fetch another.
    418 SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
    419