Home | History | Annotate | Download | only in antlr3
      1 """ANTLR3 runtime package"""
      2 
      3 # begin[licence]
      4 #
      5 # [The "BSD licence"]
      6 # Copyright (c) 2005-2012 Terence Parr
      7 # All rights reserved.
      8 #
      9 # Redistribution and use in source and binary forms, with or without
     10 # modification, are permitted provided that the following conditions
     11 # are met:
     12 # 1. Redistributions of source code must retain the above copyright
     13 #    notice, this list of conditions and the following disclaimer.
     14 # 2. Redistributions in binary form must reproduce the above copyright
     15 #    notice, this list of conditions and the following disclaimer in the
     16 #    documentation and/or other materials provided with the distribution.
     17 # 3. The name of the author may not be used to endorse or promote products
     18 #    derived from this software without specific prior written permission.
     19 #
     20 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     21 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     22 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     23 # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     24 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     25 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     29 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 #
     31 # end[licence]
     32 
     33 from .constants import DEFAULT_CHANNEL, EOF, INVALID_TOKEN_TYPE
     34 
     35 ############################################################################
     36 #
     37 # basic token interface
     38 #
     39 ############################################################################
     40 
     41 class Token(object):
     42     """@brief Abstract token baseclass."""
     43 
     44     TOKEN_NAMES_MAP = None
     45 
     46     @classmethod
     47     def registerTokenNamesMap(cls, tokenNamesMap):
     48         """@brief Store a mapping from token type to token name.
     49         
     50         This enables token.typeName to give something more meaningful
     51         than, e.g., '6'.
     52         """
     53         cls.TOKEN_NAMES_MAP = tokenNamesMap
     54         cls.TOKEN_NAMES_MAP[EOF] = "EOF"
     55 
     56     def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
     57                  index=-1, line=0, charPositionInLine=-1, input=None):
     58         # We use -1 for index and charPositionInLine as an invalid index
     59         self._type = type
     60         self._channel = channel
     61         self._text = text
     62         self._index = index
     63         self._line = 0
     64         self._charPositionInLine = charPositionInLine
     65         self.input = input
     66 
     67     # To override a property, you'll need to override both the getter and setter.
     68     @property
     69     def text(self):
     70         return self._text
     71 
     72     @text.setter
     73     def text(self, value):
     74         self._text = value
     75 
     76 
     77     @property
     78     def type(self):
     79         return self._type
     80 
     81     @type.setter
     82     def type(self, value):
     83         self._type = value
     84 
     85     # For compatibility
     86     def getType(self):
     87         return self._type
     88 
     89     @property
     90     def typeName(self):
     91         if self.TOKEN_NAMES_MAP:
     92             return self.TOKEN_NAMES_MAP.get(self._type, "INVALID_TOKEN_TYPE")
     93         else:
     94             return str(self._type)
     95     
     96     @property
     97     def line(self):
     98         """Lines are numbered 1..n."""
     99         return self._line
    100 
    101     @line.setter
    102     def line(self, value):
    103         self._line = value
    104 
    105 
    106     @property
    107     def charPositionInLine(self):
    108         """Columns are numbered 0..n-1."""
    109         return self._charPositionInLine
    110 
    111     @charPositionInLine.setter
    112     def charPositionInLine(self, pos):
    113         self._charPositionInLine = pos
    114 
    115 
    116     @property
    117     def channel(self):
    118         return self._channel
    119 
    120     @channel.setter
    121     def channel(self, value):
    122         self._channel = value
    123 
    124 
    125     @property
    126     def index(self):
    127         """
    128         An index from 0..n-1 of the token object in the input stream.
    129         This must be valid in order to use the ANTLRWorks debugger.
    130         """
    131         return self._index
    132 
    133     @index.setter
    134     def index(self, value):
    135         self._index = value
    136 
    137 
    138     def getInputStream(self):
    139         """@brief From what character stream was this token created.
    140 
    141         You don't have to implement but it's nice to know where a Token
    142         comes from if you have include files etc... on the input."""
    143 
    144         raise NotImplementedError
    145 
    146     def setInputStream(self, input):
    147         """@brief From what character stream was this token created.
    148 
    149         You don't have to implement but it's nice to know where a Token
    150         comes from if you have include files etc... on the input."""
    151 
    152         raise NotImplementedError
    153 
    154 
    155 ############################################################################
    156 #
    157 # token implementations
    158 #
    159 # Token
    160 # +- CommonToken
    161 # \- ClassicToken
    162 #
    163 ############################################################################
    164 
    165 class CommonToken(Token):
    166     """@brief Basic token implementation.
    167 
    168     This implementation does not copy the text from the input stream upon
    169     creation, but keeps start/stop pointers into the stream to avoid
    170     unnecessary copy operations.
    171 
    172     """
    173 
    174     def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
    175                  input=None, start=None, stop=None, oldToken=None):
    176 
    177         if oldToken:
    178             super().__init__(oldToken.type, oldToken.channel, oldToken.text,
    179                              oldToken.index, oldToken.line,
    180                              oldToken.charPositionInLine, oldToken.input)
    181             if isinstance(oldToken, CommonToken):
    182                 self.start = oldToken.start
    183                 self.stop = oldToken.stop
    184             else:
    185                 self.start = start
    186                 self.stop = stop
    187 
    188         else:
    189             super().__init__(type=type, channel=channel, input=input)
    190 
    191             # We need to be able to change the text once in a while.  If
    192             # this is non-null, then getText should return this.  Note that
    193             # start/stop are not affected by changing this.
    194             self._text = text
    195 
    196             # The char position into the input buffer where this token starts
    197             self.start = start
    198 
    199             # The char position into the input buffer where this token stops
    200             # This is the index of the last char, *not* the index after it!
    201             self.stop = stop
    202 
    203 
    204     @property
    205     def text(self):
    206         # Could be the empty string, and we want to return that.
    207         if self._text is not None:
    208             return self._text
    209 
    210         if not self.input:
    211             return None
    212 
    213         if self.start < self.input.size() and self.stop < self.input.size():
    214             return self.input.substring(self.start, self.stop)
    215 
    216         return '<EOF>'
    217 
    218     @text.setter
    219     def text(self, value):
    220         """
    221         Override the text for this token.  getText() will return this text
    222         rather than pulling from the buffer.  Note that this does not mean
    223         that start/stop indexes are not valid.  It means that that input
    224         was converted to a new string in the token object.
    225         """
    226         self._text = value
    227 
    228 
    229     def getInputStream(self):
    230         return self.input
    231 
    232     def setInputStream(self, input):
    233         self.input = input
    234 
    235 
    236     def __str__(self):
    237         if self.type == EOF:
    238             return "<EOF>"
    239 
    240         channelStr = ""
    241         if self.channel > 0:
    242             channelStr = ",channel=" + str(self.channel)
    243 
    244         txt = self.text
    245         if txt:
    246             # Put 2 backslashes in front of each character
    247             txt = txt.replace("\n", r"\\n")
    248             txt = txt.replace("\r", r"\\r")
    249             txt = txt.replace("\t", r"\\t")
    250         else:
    251             txt = "<no text>"
    252 
    253         return ("[@{0.index},{0.start}:{0.stop}={txt!r},"
    254                 "<{0.typeName}>{channelStr},"
    255                 "{0.line}:{0.charPositionInLine}]"
    256                 .format(self, txt=txt, channelStr=channelStr))
    257 
    258 
    259 class ClassicToken(Token):
    260     """@brief Alternative token implementation.
    261 
    262     A Token object like we'd use in ANTLR 2.x; has an actual string created
    263     and associated with this object.  These objects are needed for imaginary
    264     tree nodes that have payload objects.  We need to create a Token object
    265     that has a string; the tree node will point at this token.  CommonToken
    266     has indexes into a char stream and hence cannot be used to introduce
    267     new strings.
    268     """
    269 
    270     def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL,
    271                  oldToken=None):
    272         if oldToken:
    273             super().__init__(type=oldToken.type, channel=oldToken.channel,
    274                              text=oldToken.text, line=oldToken.line,
    275                              charPositionInLine=oldToken.charPositionInLine)
    276 
    277         else:
    278             super().__init__(type=type, channel=channel, text=text,
    279                              index=None, line=None, charPositionInLine=None)
    280 
    281 
    282     def getInputStream(self):
    283         return None
    284 
    285     def setInputStream(self, input):
    286         pass
    287 
    288 
    289     def toString(self):
    290         channelStr = ""
    291         if self.channel > 0:
    292             channelStr = ",channel=" + str(self.channel)
    293 
    294         txt = self.text
    295         if not txt:
    296             txt = "<no text>"
    297 
    298         return ("[@{0.index!r},{txt!r},<{0.type!r}>{channelStr},"
    299                 "{0.line!r}:{0.charPositionInLine!r}]"
    300                 .format(self, txt=txt, channelStr=channelStr))
    301 
    302     __str__ = toString
    303     __repr__ = toString
    304 
    305 
    306 INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
    307 
    308 # In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
    309 # will avoid creating a token for this symbol and try to fetch another.
    310 SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
    311