Home | History | Annotate | Download | only in antlr3
      1 """ANTLR3 runtime package"""
      2 
      3 # begin[licence]
      4 #
      5 # [The "BSD licence"]
      6 # Copyright (c) 2005-2008 Terence Parr
      7 # All rights reserved.
      8 #
      9 # Redistribution and use in source and binary forms, with or without
     10 # modification, are permitted provided that the following conditions
     11 # are met:
     12 # 1. Redistributions of source code must retain the above copyright
     13 #    notice, this list of conditions and the following disclaimer.
     14 # 2. Redistributions in binary form must reproduce the above copyright
     15 #    notice, this list of conditions and the following disclaimer in the
     16 #    documentation and/or other materials provided with the distribution.
     17 # 3. The name of the author may not be used to endorse or promote products
     18 #    derived from this software without specific prior written permission.
     19 #
     20 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     21 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     22 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     23 # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     24 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     25 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     29 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 #
     31 # end[licensc]
     32 
     33 from antlr3.constants import EOF
     34 from antlr3.exceptions import NoViableAltException, BacktrackingFailed
     35 
     36 
     37 class DFA(object):
     38     """@brief A DFA implemented as a set of transition tables.
     39 
     40     Any state that has a semantic predicate edge is special; those states
     41     are generated with if-then-else structures in a specialStateTransition()
     42     which is generated by cyclicDFA template.
     43     
     44     """
     45     
     46     def __init__(
     47         self,
     48         recognizer, decisionNumber,
     49         eot, eof, min, max, accept, special, transition
     50         ):
     51         ## Which recognizer encloses this DFA?  Needed to check backtracking
     52         self.recognizer = recognizer
     53 
     54         self.decisionNumber = decisionNumber
     55         self.eot = eot
     56         self.eof = eof
     57         self.min = min
     58         self.max = max
     59         self.accept = accept
     60         self.special = special
     61         self.transition = transition
     62 
     63 
     64     def predict(self, input):
     65         """
     66         From the input stream, predict what alternative will succeed
     67 	using this DFA (representing the covering regular approximation
     68 	to the underlying CFL).  Return an alternative number 1..n.  Throw
     69 	 an exception upon error.
     70 	"""
     71         mark = input.mark()
     72         s = 0 # we always start at s0
     73         try:
     74             for _ in xrange(50000):
     75                 #print "***Current state = %d" % s
     76                 
     77                 specialState = self.special[s]
     78                 if specialState >= 0:
     79                     #print "is special"
     80                     s = self.specialStateTransition(specialState, input)
     81                     if s == -1:
     82                         self.noViableAlt(s, input)
     83                         return 0
     84                     input.consume()
     85                     continue
     86 
     87                 if self.accept[s] >= 1:
     88                     #print "accept state for alt %d" % self.accept[s]
     89                     return self.accept[s]
     90 
     91                 # look for a normal char transition
     92                 c = input.LA(1)
     93 
     94                 #print "LA = %d (%r)" % (c, unichr(c) if c >= 0 else 'EOF')
     95                 #print "range = %d..%d" % (self.min[s], self.max[s])
     96 
     97                 if c >= self.min[s] and c <= self.max[s]:
     98                     # move to next state
     99                     snext = self.transition[s][c-self.min[s]]
    100                     #print "in range, next state = %d" % snext
    101                     
    102                     if snext < 0:
    103                         #print "not a normal transition"
    104                         # was in range but not a normal transition
    105                         # must check EOT, which is like the else clause.
    106                         # eot[s]>=0 indicates that an EOT edge goes to another
    107                         # state.
    108                         if self.eot[s] >= 0: # EOT Transition to accept state?
    109                             #print "EOT trans to accept state %d" % self.eot[s]
    110                             
    111                             s = self.eot[s]
    112                             input.consume()
    113                             # TODO: I had this as return accept[eot[s]]
    114                             # which assumed here that the EOT edge always
    115                             # went to an accept...faster to do this, but
    116                             # what about predicated edges coming from EOT
    117                             # target?
    118                             continue
    119 
    120                         #print "no viable alt"
    121                         self.noViableAlt(s, input)
    122                         return 0
    123 
    124                     s = snext
    125                     input.consume()
    126                     continue
    127 
    128                 if self.eot[s] >= 0:
    129                     #print "EOT to %d" % self.eot[s]
    130                     
    131                     s = self.eot[s]
    132                     input.consume()
    133                     continue
    134 
    135                 # EOF Transition to accept state?
    136                 if c == EOF and self.eof[s] >= 0:
    137                     #print "EOF Transition to accept state %d" \
    138                     #  % self.accept[self.eof[s]]
    139                     return self.accept[self.eof[s]]
    140 
    141                 # not in range and not EOF/EOT, must be invalid symbol
    142                 self.noViableAlt(s, input)
    143                 return 0
    144 
    145             else:
    146                 raise RuntimeError("DFA bang!")
    147             
    148         finally:
    149             input.rewind(mark)
    150 
    151 
    152     def noViableAlt(self, s, input):
    153         if self.recognizer._state.backtracking > 0:
    154             raise BacktrackingFailed
    155 
    156         nvae = NoViableAltException(
    157             self.getDescription(),
    158             self.decisionNumber,
    159             s,
    160             input
    161             )
    162 
    163         self.error(nvae)
    164         raise nvae
    165 
    166 
    167     def error(self, nvae):
    168         """A hook for debugging interface"""
    169         pass
    170 
    171 
    172     def specialStateTransition(self, s, input):
    173         return -1
    174 
    175 
    176     def getDescription(self):
    177         return "n/a"
    178 
    179 
    180 ##     def specialTransition(self, state, symbol):
    181 ##         return 0
    182 
    183 
    184     def unpack(cls, string):
    185         """@brief Unpack the runlength encoded table data.
    186 
    187         Terence implemented packed table initializers, because Java has a
    188         size restriction on .class files and the lookup tables can grow
    189         pretty large. The generated JavaLexer.java of the Java.g example
    190         would be about 15MB with uncompressed array initializers.
    191 
    192         Python does not have any size restrictions, but the compilation of
    193         such large source files seems to be pretty memory hungry. The memory
    194         consumption of the python process grew to >1.5GB when importing a
    195         15MB lexer, eating all my swap space and I was to impacient to see,
    196         if it could finish at all. With packed initializers that are unpacked
    197         at import time of the lexer module, everything works like a charm.
    198         
    199         """
    200         
    201         ret = []
    202         for i in range(len(string) / 2):
    203             (n, v) = ord(string[i*2]), ord(string[i*2+1])
    204 
    205             # Is there a bitwise operation to do this?
    206             if v == 0xFFFF:
    207                 v = -1
    208 
    209             ret += [v] * n
    210 
    211         return ret
    212     
    213     unpack = classmethod(unpack)
    214