Home | History | Annotate | Download | only in pyxelator
      1 #!/usr/bin/env python
      2 """ cdecl.py - parse c declarations
      3 
      4 (c) 2002, 2003, 2004, 2005 Simon Burton <simon (at] arrowtheory.com>
      5 Released under GNU LGPL license.
      6 
      7 version 0.xx
      8 
      9 """
     10 
     11 import sys
     12 import string
     13 import types
     14 import copy
     15 
     16 #from cparse import BasicType, Qualifier, StorageClass, Typedef, Ellipses, GCCBuiltin
     17 #from cparse import *
     18 
     19 import cparse as host
     20 
     21 class LexError(Exception):
     22   pass
     23 
     24 class Lexer(object):
     25   def __init__(self,s="",verbose=0,**kw):
     26     self.verbose = verbose
     27     self.lookup = {} # a map for keywords and typedefs
     28     for t in \
     29       "float double void char int".split():
     30       self.lookup[t] = host.BasicType( t )
     31     for t in \
     32       "register signed unsigned short long const volatile inline".split(): # inline here ???
     33       self.lookup[t] = host.Qualifier( t )
     34     for t in "extern static auto".split():
     35       self.lookup[t] = host.StorageClass( t )
     36     self.lookup['typedef'] = host.Typedef()
     37     #self.lookup['__inline__'] = host.GCCBuiltin('__inline__')
     38     #self.lookup['__extension__'] = host.Qualifier('__extension__')
     39     self.lookup['...'] = host.Ellipses()
     40     if s:
     41       self.lex(s)
     42     for key in kw.keys():
     43       self.__dict__[key] = kw[key]
     44 
     45   def lex(self,s):
     46     self.stack = None
     47     self.lines = s.splitlines()
     48     self.set_state("","",0,0)
     49     self.so_file = ""
     50     self._newline()
     51     self.get_token() # start
     52 
     53   def mktypedef(self,tok,node):
     54     if self.verbose:
     55       print "%s.mktypedef(%s,%s)"%(self,tok,node)
     56     self.lookup[ tok ] = node
     57 
     58   def rmtypedef(self,tok):
     59     " used in round trip testing "
     60 #    print "# rmtypedef(%s)"%tok
     61     assert isinstance( self.lookup[ tok ], host.Node ) # existance
     62     del self.lookup[ tok ]
     63 
     64   def _get_kind(self,tok):
     65     #print '_get_kind(%s)'%tok,self.lookup
     66     try:
     67       return self.lookup[tok]
     68       #return self.lookup[tok].clone()
     69     except KeyError:
     70       if tok.startswith("__builtin"):
     71         node = host.GCCBuiltin(tok)
     72         self.lookup[tok] = node
     73         return node
     74       #elif tok in ( "__extension__", ):
     75         #node = GCCBuiltin(tok)
     76         #self.lookup[tok] = node
     77         #return node
     78       return None
     79 
     80   def _newline(self):
     81     while self.lno < len(self.lines):
     82       line = self.lines[self.lno]
     83       if not line or line[0] != "#":
     84         break
     85       l = line.split('"')
     86       assert len(l)>=2
     87       self.so_file = l[1]
     88       #self.so_lno = int( l[0].split()[1] )
     89       #sys.stderr.write("# %s %s: %s\n"%(so_lno,so_file,l))
     90       self.lno+=1
     91 
     92   def get_brace_token( self ):
     93     self.push_state()
     94     ident_chars0 = string.letters+"_"
     95     ident_chars1 = string.letters+string.digits+"_"
     96     tok, kind = "", ""
     97     while self.lno < len(self.lines):
     98       s = self.lines[self.lno]
     99       i=self.col
    100       while i < len(s):
    101         if s[i] not in '{}':
    102           i=i+1
    103           continue
    104         else:
    105           tok = s[i]
    106           kind = tok
    107           self.col = i+1
    108           break
    109         # keep moving
    110         #sys.stderr.write( "lexer ignoring '%s'\n"%s[i] )
    111         i=i+1
    112       if i==len(s):
    113         # nothing found
    114         assert tok == ""
    115         self.col=0
    116         self.lno+=1
    117         self._newline()
    118       else:
    119         assert tok
    120         break
    121     self.set_state(tok,kind,self.lno,self.col)
    122 
    123   def get_token(self):
    124     self.push_state()
    125     ident_chars0 = string.letters+"_"
    126     ident_chars1 = string.letters+string.digits+"_"
    127     tok, kind = "", ""
    128     while self.lno < len(self.lines):
    129       s = self.lines[self.lno]
    130       i=self.col
    131       while i < len(s):
    132         if s[i].isspace():
    133           i=i+1
    134           continue
    135         #if s[i] in ident_chars0:
    136         if s[i].isalpha() or s[i]=='_':
    137           # identifier
    138           j=i+1
    139           while j<len(s):
    140             if s[j] in ident_chars1:
    141               j=j+1
    142             else:
    143               break
    144           tok = s[i:j]
    145           self.col = j
    146           kind = self._get_kind(tok)
    147           break
    148         if s[i].isdigit() or \
    149             (i+1<len(s) and s[i] in '+-.' and s[i+1].isdigit()):
    150           # number literal
    151           is_float = s[i]=='.'
    152           is_hex = s[i:i+2]=='0x'
    153           if is_hex:
    154             i=i+2
    155             assert s[i].isdigit() or s[i] in "abcdefABCDEF", self.err_string()
    156           j=i+1
    157           while j<len(s):
    158             #print "lex ",repr(s[i]),is_float
    159             if s[j].isdigit() or (is_hex and s[j] in "abcdefABCDEF"):
    160               j=j+1
    161             elif s[j]=='.' and not is_float:
    162               assert not is_hex
    163               j=j+1
    164               is_float=1
    165             else:
    166               break 
    167           tok = s[i:j]
    168           self.col = j
    169           if is_float:
    170             kind = float(tok)
    171           elif is_hex:
    172             kind = int(tok,16)
    173           else:
    174             kind = int(tok)
    175           break
    176         if s[i:i+3]=='...':
    177           # ellipses
    178           #sys.stderr.write( "ELLIPSES "+str(self.get_state()) )
    179           tok = s[i:i+3]
    180           kind = self._get_kind(tok)
    181           self.col = i+3
    182           break
    183         if s[i] in '*/{}()[]:;,=+-~.<>|&':
    184           tok = s[i]
    185           kind = tok
    186           self.col = i+1
    187           break
    188         if s[i] == "'":
    189           j = i+2
    190           while j<len(s) and s[j]!="'":
    191             j+=1
    192           if j==len(s):
    193             raise LexError( self.err_string() + "unterminated char constant" )
    194           tok = s[i:j+1]
    195           self.col = j+1
    196           kind = s[i:j+1]
    197           break
    198         # keep moving
    199         #sys.stderr.write( "lexer ignoring '%s'\n"%s[i] )
    200         sys.stderr.write( "lexer ignoring '%s' lno=%d\n"%(s[i],self.lno+1) )
    201         i=i+1
    202         # end while i < len(s)
    203       if i==len(s):
    204         # nothing found, go to next line
    205         assert tok == ""
    206         self.col=0
    207         self.lno+=1
    208         self._newline()
    209       else:
    210         # we got one
    211         assert tok
    212         break
    213       # end while self.lno < len(self.lines):
    214     self.set_state(tok,kind,self.lno,self.col)
    215 
    216   def err_string(self):
    217     "Return helpful error string :)"
    218     return self.lines[self.lno]+"\n"+" "*self.col+"^\n"
    219 
    220   def push_state(self):
    221     self.stack = self.get_state() # a short stack :)
    222     #self.stack.push( self.get_state() )
    223 
    224   def unget_token(self):
    225     assert self.stack is not None
    226     self.set_state(*self.stack)
    227     self.stack = None
    228 
    229   def set_state(self,tok,kind,lno,col):
    230     if self.verbose:
    231       print "tok,kind,lno,col = ",(tok,kind,lno,col)
    232     self.tok = tok
    233     self.kind = kind
    234     self.lno = lno # line
    235     self.col = col # column
    236 
    237   def get_state(self):
    238     return self.tok,self.kind,self.lno,self.col
    239 
    240   def get_file(self):
    241     return self.so_file
    242 
    243 ###################################################################
    244 #
    245 ###################################################################
    246 #
    247 
    248 
    249