Home | History | Annotate | Download | only in doc
      1 #!/usr/bin/python -u
      2 #
      3 # This is the API builder, it parses the C sources and build the
      4 # API formal description in XML.
      5 #
      6 # See Copyright for the status of this software.
      7 #
      8 # daniel (at] veillard.com
      9 #
     10 import os, sys
     11 import string
     12 import glob
     13 
     14 debug=0
     15 #debugsym='ignorableWhitespaceSAXFunc'
     16 debugsym=None
     17 
     18 #
     19 # C parser analysis code
     20 #
     21 ignored_files = {
     22   "trio": "too many non standard macros",
     23   "trio.c": "too many non standard macros",
     24   "trionan.c": "too many non standard macros",
     25   "triostr.c": "too many non standard macros",
     26   "acconfig.h": "generated portability layer",
     27   "config.h": "generated portability layer",
     28   "libxml.h": "internal only",
     29   "testOOM.c": "out of memory tester",
     30   "testOOMlib.h": "out of memory tester",
     31   "testOOMlib.c": "out of memory tester",
     32   "rngparser.c": "not yet integrated",
     33   "rngparser.h": "not yet integrated",
     34   "elfgcchack.h": "not a normal header",
     35   "testHTML.c": "test tool",
     36   "testReader.c": "test tool",
     37   "testSchemas.c": "test tool",
     38   "testXPath.c": "test tool",
     39   "testAutomata.c": "test tool",
     40   "testModule.c": "test tool",
     41   "testRegexp.c": "test tool",
     42   "testThreads.c": "test tool",
     43   "testC14N.c": "test tool",
     44   "testRelax.c": "test tool",
     45   "testThreadsWin32.c": "test tool",
     46   "testSAX.c": "test tool",
     47   "testURI.c": "test tool",
     48   "testapi.c": "generated regression tests",
     49   "runtest.c": "regression tests program",
     50   "runsuite.c": "regression tests program",
     51   "tst.c": "not part of the library",
     52   "test.c": "not part of the library",
     53   "testdso.c": "test for dynamid shared libraries",
     54   "testrecurse.c": "test for entities recursions",
     55   "xzlib.h": "Internal API only 2.8.0",
     56   "buf.h": "Internal API only 2.9.0",
     57   "enc.h": "Internal API only 2.9.0",
     58   "/save.h": "Internal API only 2.9.0",
     59   "timsort.h": "Internal header only for xpath.c 2.9.0",
     60 }
     61 
     62 ignored_words = {
     63   "WINAPI": (0, "Windows keyword"),
     64   "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
     65   "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
     66   "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
     67   "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
     68   "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
     69   "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
     70   "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
     71   "XMLCALL": (0, "Special macro for win32 calls"),
     72   "XSLTCALL": (0, "Special macro for win32 calls"),
     73   "XMLCDECL": (0, "Special macro for win32 calls"),
     74   "EXSLTCALL": (0, "Special macro for win32 calls"),
     75   "__declspec": (3, "Windows keyword"),
     76   "__stdcall": (0, "Windows keyword"),
     77   "ATTRIBUTE_UNUSED": (0, "macro keyword"),
     78   "LIBEXSLT_PUBLIC": (0, "macro keyword"),
     79   "X_IN_Y": (5, "macro function builder"),
     80   "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
     81   "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
     82   "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
     83   "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
     84   "__XML_EXTERNC": (0, "Special macro added for os400"),
     85 }
     86 
     87 def escape(raw):
     88     raw = raw.replace('&', '&')
     89     raw = raw.replace('<', '&lt;')
     90     raw = raw.replace('>', '&gt;')
     91     raw = raw.replace("'", '&apos;')
     92     raw = raw.replace('"', '&quot;')
     93     return raw
     94 
     95 def uniq(items):
     96     d = {}
     97     for item in items:
     98         d[item]=1
     99     return list(d.keys())
    100 
    101 class identifier:
    102     def __init__(self, name, header=None, module=None, type=None, lineno = 0,
    103                  info=None, extra=None, conditionals = None):
    104         self.name = name
    105         self.header = header
    106         self.module = module
    107         self.type = type
    108         self.info = info
    109         self.extra = extra
    110         self.lineno = lineno
    111         self.static = 0
    112         if conditionals == None or len(conditionals) == 0:
    113             self.conditionals = None
    114         else:
    115             self.conditionals = conditionals[:]
    116         if self.name == debugsym:
    117             print("=> define %s : %s" % (debugsym, (module, type, info,
    118                                          extra, conditionals)))
    119 
    120     def __repr__(self):
    121         r = "%s %s:" % (self.type, self.name)
    122         if self.static:
    123             r = r + " static"
    124         if self.module != None:
    125             r = r + " from %s" % (self.module)
    126         if self.info != None:
    127             r = r + " " +  repr(self.info)
    128         if self.extra != None:
    129             r = r + " " + repr(self.extra)
    130         if self.conditionals != None:
    131             r = r + " " + repr(self.conditionals)
    132         return r
    133 
    134 
    135     def set_header(self, header):
    136         self.header = header
    137     def set_module(self, module):
    138         self.module = module
    139     def set_type(self, type):
    140         self.type = type
    141     def set_info(self, info):
    142         self.info = info
    143     def set_extra(self, extra):
    144         self.extra = extra
    145     def set_lineno(self, lineno):
    146         self.lineno = lineno
    147     def set_static(self, static):
    148         self.static = static
    149     def set_conditionals(self, conditionals):
    150         if conditionals == None or len(conditionals) == 0:
    151             self.conditionals = None
    152         else:
    153             self.conditionals = conditionals[:]
    154 
    155     def get_name(self):
    156         return self.name
    157     def get_header(self):
    158         return self.module
    159     def get_module(self):
    160         return self.module
    161     def get_type(self):
    162         return self.type
    163     def get_info(self):
    164         return self.info
    165     def get_lineno(self):
    166         return self.lineno
    167     def get_extra(self):
    168         return self.extra
    169     def get_static(self):
    170         return self.static
    171     def get_conditionals(self):
    172         return self.conditionals
    173 
    174     def update(self, header, module, type = None, info = None, extra=None,
    175                conditionals=None):
    176         if self.name == debugsym:
    177             print("=> update %s : %s" % (debugsym, (module, type, info,
    178                                          extra, conditionals)))
    179         if header != None and self.header == None:
    180             self.set_header(module)
    181         if module != None and (self.module == None or self.header == self.module):
    182             self.set_module(module)
    183         if type != None and self.type == None:
    184             self.set_type(type)
    185         if info != None:
    186             self.set_info(info)
    187         if extra != None:
    188             self.set_extra(extra)
    189         if conditionals != None:
    190             self.set_conditionals(conditionals)
    191 
    192 class index:
    193     def __init__(self, name = "noname"):
    194         self.name = name
    195         self.identifiers = {}
    196         self.functions = {}
    197         self.variables = {}
    198         self.includes = {}
    199         self.structs = {}
    200         self.enums = {}
    201         self.typedefs = {}
    202         self.macros = {}
    203         self.references = {}
    204         self.info = {}
    205 
    206     def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
    207         if name[0:2] == '__':
    208             return None
    209         d = None
    210         try:
    211            d = self.identifiers[name]
    212            d.update(header, module, type, lineno, info, extra, conditionals)
    213         except:
    214            d = identifier(name, header, module, type, lineno, info, extra, conditionals)
    215            self.identifiers[name] = d
    216 
    217         if d != None and static == 1:
    218             d.set_static(1)
    219 
    220         if d != None and name != None and type != None:
    221             self.references[name] = d
    222 
    223         if name == debugsym:
    224             print("New ref: %s" % (d))
    225 
    226         return d
    227 
    228     def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
    229         if name[0:2] == '__':
    230             return None
    231         d = None
    232         try:
    233            d = self.identifiers[name]
    234            d.update(header, module, type, lineno, info, extra, conditionals)
    235         except:
    236            d = identifier(name, header, module, type, lineno, info, extra, conditionals)
    237            self.identifiers[name] = d
    238 
    239         if d != None and static == 1:
    240             d.set_static(1)
    241 
    242         if d != None and name != None and type != None:
    243             if type == "function":
    244                 self.functions[name] = d
    245             elif type == "functype":
    246                 self.functions[name] = d
    247             elif type == "variable":
    248                 self.variables[name] = d
    249             elif type == "include":
    250                 self.includes[name] = d
    251             elif type == "struct":
    252                 self.structs[name] = d
    253             elif type == "enum":
    254                 self.enums[name] = d
    255             elif type == "typedef":
    256                 self.typedefs[name] = d
    257             elif type == "macro":
    258                 self.macros[name] = d
    259             else:
    260                 print("Unable to register type ", type)
    261 
    262         if name == debugsym:
    263             print("New symbol: %s" % (d))
    264 
    265         return d
    266 
    267     def merge(self, idx):
    268         for id in list(idx.functions.keys()):
    269               #
    270               # macro might be used to override functions or variables
    271               # definitions
    272               #
    273              if id in self.macros:
    274                  del self.macros[id]
    275              if id in self.functions:
    276                  print("function %s from %s redeclared in %s" % (
    277                     id, self.functions[id].header, idx.functions[id].header))
    278              else:
    279                  self.functions[id] = idx.functions[id]
    280                  self.identifiers[id] = idx.functions[id]
    281         for id in list(idx.variables.keys()):
    282               #
    283               # macro might be used to override functions or variables
    284               # definitions
    285               #
    286              if id in self.macros:
    287                  del self.macros[id]
    288              if id in self.variables:
    289                  print("variable %s from %s redeclared in %s" % (
    290                     id, self.variables[id].header, idx.variables[id].header))
    291              else:
    292                  self.variables[id] = idx.variables[id]
    293                  self.identifiers[id] = idx.variables[id]
    294         for id in list(idx.structs.keys()):
    295              if id in self.structs:
    296                  print("struct %s from %s redeclared in %s" % (
    297                     id, self.structs[id].header, idx.structs[id].header))
    298              else:
    299                  self.structs[id] = idx.structs[id]
    300                  self.identifiers[id] = idx.structs[id]
    301         for id in list(idx.typedefs.keys()):
    302              if id in self.typedefs:
    303                  print("typedef %s from %s redeclared in %s" % (
    304                     id, self.typedefs[id].header, idx.typedefs[id].header))
    305              else:
    306                  self.typedefs[id] = idx.typedefs[id]
    307                  self.identifiers[id] = idx.typedefs[id]
    308         for id in list(idx.macros.keys()):
    309               #
    310               # macro might be used to override functions or variables
    311               # definitions
    312               #
    313              if id in self.variables:
    314                  continue
    315              if id in self.functions:
    316                  continue
    317              if id in self.enums:
    318                  continue
    319              if id in self.macros:
    320                  print("macro %s from %s redeclared in %s" % (
    321                     id, self.macros[id].header, idx.macros[id].header))
    322              else:
    323                  self.macros[id] = idx.macros[id]
    324                  self.identifiers[id] = idx.macros[id]
    325         for id in list(idx.enums.keys()):
    326              if id in self.enums:
    327                  print("enum %s from %s redeclared in %s" % (
    328                     id, self.enums[id].header, idx.enums[id].header))
    329              else:
    330                  self.enums[id] = idx.enums[id]
    331                  self.identifiers[id] = idx.enums[id]
    332 
    333     def merge_public(self, idx):
    334         for id in list(idx.functions.keys()):
    335              if id in self.functions:
    336                  # check that function condition agrees with header
    337                  if idx.functions[id].conditionals != \
    338                     self.functions[id].conditionals:
    339                      print("Header condition differs from Function for %s:" \
    340                         % id)
    341                      print("  H: %s" % self.functions[id].conditionals)
    342                      print("  C: %s" % idx.functions[id].conditionals)
    343                  up = idx.functions[id]
    344                  self.functions[id].update(None, up.module, up.type, up.info, up.extra)
    345          #     else:
    346          #         print "Function %s from %s is not declared in headers" % (
    347          #                id, idx.functions[id].module)
    348          # TODO: do the same for variables.
    349 
    350     def analyze_dict(self, type, dict):
    351         count = 0
    352         public = 0
    353         for name in list(dict.keys()):
    354             id = dict[name]
    355             count = count + 1
    356             if id.static == 0:
    357                 public = public + 1
    358         if count != public:
    359             print("  %d %s , %d public" % (count, type, public))
    360         elif count != 0:
    361             print("  %d public %s" % (count, type))
    362 
    363 
    364     def analyze(self):
    365         self.analyze_dict("functions", self.functions)
    366         self.analyze_dict("variables", self.variables)
    367         self.analyze_dict("structs", self.structs)
    368         self.analyze_dict("typedefs", self.typedefs)
    369         self.analyze_dict("macros", self.macros)
    370 
    371 class CLexer:
    372     """A lexer for the C language, tokenize the input by reading and
    373        analyzing it line by line"""
    374     def __init__(self, input):
    375         self.input = input
    376         self.tokens = []
    377         self.line = ""
    378         self.lineno = 0
    379 
    380     def getline(self):
    381         line = ''
    382         while line == '':
    383             line = self.input.readline()
    384             if not line:
    385                 return None
    386             self.lineno = self.lineno + 1
    387             line = line.lstrip()
    388             line = line.rstrip()
    389             if line == '':
    390                 continue
    391             while line[-1] == '\\':
    392                 line = line[:-1]
    393                 n = self.input.readline()
    394                 self.lineno = self.lineno + 1
    395                 n = n.lstrip()
    396                 n = n.rstrip()
    397                 if not n:
    398                     break
    399                 else:
    400                     line = line + n
    401         return line
    402 
    403     def getlineno(self):
    404         return self.lineno
    405 
    406     def push(self, token):
    407         self.tokens.insert(0, token);
    408 
    409     def debug(self):
    410         print("Last token: ", self.last)
    411         print("Token queue: ", self.tokens)
    412         print("Line %d end: " % (self.lineno), self.line)
    413 
    414     def token(self):
    415         while self.tokens == []:
    416             if self.line == "":
    417                 line = self.getline()
    418             else:
    419                 line = self.line
    420                 self.line = ""
    421             if line == None:
    422                 return None
    423 
    424             if line[0] == '#':
    425                 self.tokens = list(map((lambda x: ('preproc', x)),
    426                                   line.split()))
    427                 break;
    428             l = len(line)
    429             if line[0] == '"' or line[0] == "'":
    430                 end = line[0]
    431                 line = line[1:]
    432                 found = 0
    433                 tok = ""
    434                 while found == 0:
    435                     i = 0
    436                     l = len(line)
    437                     while i < l:
    438                         if line[i] == end:
    439                             self.line = line[i+1:]
    440                             line = line[:i]
    441                             l = i
    442                             found = 1
    443                             break
    444                         if line[i] == '\\':
    445                             i = i + 1
    446                         i = i + 1
    447                     tok = tok + line
    448                     if found == 0:
    449                         line = self.getline()
    450                         if line == None:
    451                             return None
    452                 self.last = ('string', tok)
    453                 return self.last
    454 
    455             if l >= 2 and line[0] == '/' and line[1] == '*':
    456                 line = line[2:]
    457                 found = 0
    458                 tok = ""
    459                 while found == 0:
    460                     i = 0
    461                     l = len(line)
    462                     while i < l:
    463                         if line[i] == '*' and i+1 < l and line[i+1] == '/':
    464                             self.line = line[i+2:]
    465                             line = line[:i-1]
    466                             l = i
    467                             found = 1
    468                             break
    469                         i = i + 1
    470                     if tok != "":
    471                         tok = tok + "\n"
    472                     tok = tok + line
    473                     if found == 0:
    474                         line = self.getline()
    475                         if line == None:
    476                             return None
    477                 self.last = ('comment', tok)
    478                 return self.last
    479             if l >= 2 and line[0] == '/' and line[1] == '/':
    480                 line = line[2:]
    481                 self.last = ('comment', line)
    482                 return self.last
    483             i = 0
    484             while i < l:
    485                 if line[i] == '/' and i+1 < l and line[i+1] == '/':
    486                     self.line = line[i:]
    487                     line = line[:i]
    488                     break
    489                 if line[i] == '/' and i+1 < l and line[i+1] == '*':
    490                     self.line = line[i:]
    491                     line = line[:i]
    492                     break
    493                 if line[i] == '"' or line[i] == "'":
    494                     self.line = line[i:]
    495                     line = line[:i]
    496                     break
    497                 i = i + 1
    498             l = len(line)
    499             i = 0
    500             while i < l:
    501                 if line[i] == ' ' or line[i] == '\t':
    502                     i = i + 1
    503                     continue
    504                 o = ord(line[i])
    505                 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
    506                    (o >= 48 and o <= 57):
    507                     s = i
    508                     while i < l:
    509                         o = ord(line[i])
    510                         if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
    511                            (o >= 48 and o <= 57) or \
    512 			   (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
    513                             i = i + 1
    514                         else:
    515                             break
    516                     self.tokens.append(('name', line[s:i]))
    517                     continue
    518                 if "(){}:;,[]".find(line[i]) != -1:
    519 #                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
    520 #                    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
    521 #                    line[i] == ',' or line[i] == '[' or line[i] == ']':
    522                     self.tokens.append(('sep', line[i]))
    523                     i = i + 1
    524                     continue
    525                 if "+-*><=/%&!|.".find(line[i]) != -1:
    526 #                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
    527 #                    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
    528 #                    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
    529 #                    line[i] == '!' or line[i] == '|' or line[i] == '.':
    530                     if line[i] == '.' and  i + 2 < l and \
    531                        line[i+1] == '.' and line[i+2] == '.':
    532                         self.tokens.append(('name', '...'))
    533                         i = i + 3
    534                         continue
    535 
    536                     j = i + 1
    537                     if j < l and (
    538                        "+-*><=/%&!|".find(line[j]) != -1):
    539 #                        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
    540 #                        line[j] == '>' or line[j] == '<' or line[j] == '=' or \
    541 #                        line[j] == '/' or line[j] == '%' or line[j] == '&' or \
    542 #                        line[j] == '!' or line[j] == '|'):
    543                         self.tokens.append(('op', line[i:j+1]))
    544                         i = j + 1
    545                     else:
    546                         self.tokens.append(('op', line[i]))
    547                         i = i + 1
    548                     continue
    549                 s = i
    550                 while i < l:
    551                     o = ord(line[i])
    552                     if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
    553                        (o >= 48 and o <= 57) or (
    554                         " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
    555 #                         line[i] != ' ' and line[i] != '\t' and
    556 #                         line[i] != '(' and line[i] != ')' and
    557 #                         line[i] != '{'  and line[i] != '}' and
    558 #                         line[i] != ':' and line[i] != ';' and
    559 #                         line[i] != ',' and line[i] != '+' and
    560 #                         line[i] != '-' and line[i] != '*' and
    561 #                         line[i] != '/' and line[i] != '%' and
    562 #                         line[i] != '&' and line[i] != '!' and
    563 #                         line[i] != '|' and line[i] != '[' and
    564 #                         line[i] != ']' and line[i] != '=' and
    565 #                         line[i] != '*' and line[i] != '>' and
    566 #                         line[i] != '<'):
    567                         i = i + 1
    568                     else:
    569                         break
    570                 self.tokens.append(('name', line[s:i]))
    571 
    572         tok = self.tokens[0]
    573         self.tokens = self.tokens[1:]
    574         self.last = tok
    575         return tok
    576 
    577 class CParser:
    578     """The C module parser"""
    579     def __init__(self, filename, idx = None):
    580         self.filename = filename
    581         if len(filename) > 2 and filename[-2:] == '.h':
    582             self.is_header = 1
    583         else:
    584             self.is_header = 0
    585         self.input = open(filename)
    586         self.lexer = CLexer(self.input)
    587         if idx == None:
    588             self.index = index()
    589         else:
    590             self.index = idx
    591         self.top_comment = ""
    592         self.last_comment = ""
    593         self.comment = None
    594         self.collect_ref = 0
    595         self.no_error = 0
    596         self.conditionals = []
    597         self.defines = []
    598 
    599     def collect_references(self):
    600         self.collect_ref = 1
    601 
    602     def stop_error(self):
    603         self.no_error = 1
    604 
    605     def start_error(self):
    606         self.no_error = 0
    607 
    608     def lineno(self):
    609         return self.lexer.getlineno()
    610 
    611     def index_add(self, name, module, static, type, info=None, extra = None):
    612         if self.is_header == 1:
    613             self.index.add(name, module, module, static, type, self.lineno(),
    614                            info, extra, self.conditionals)
    615         else:
    616             self.index.add(name, None, module, static, type, self.lineno(),
    617                            info, extra, self.conditionals)
    618 
    619     def index_add_ref(self, name, module, static, type, info=None,
    620                       extra = None):
    621         if self.is_header == 1:
    622             self.index.add_ref(name, module, module, static, type,
    623                                self.lineno(), info, extra, self.conditionals)
    624         else:
    625             self.index.add_ref(name, None, module, static, type, self.lineno(),
    626                                info, extra, self.conditionals)
    627 
    628     def warning(self, msg):
    629         if self.no_error:
    630             return
    631         print(msg)
    632 
    633     def error(self, msg, token=-1):
    634         if self.no_error:
    635             return
    636 
    637         print("Parse Error: " + msg)
    638         if token != -1:
    639             print("Got token ", token)
    640         self.lexer.debug()
    641         sys.exit(1)
    642 
    643     def debug(self, msg, token=-1):
    644         print("Debug: " + msg)
    645         if token != -1:
    646             print("Got token ", token)
    647         self.lexer.debug()
    648 
    649     def parseTopComment(self, comment):
    650         res = {}
    651         lines = comment.split("\n")
    652         item = None
    653         for line in lines:
    654             while line != "" and (line[0] == ' ' or line[0] == '\t'):
    655                 line = line[1:]
    656             while line != "" and line[0] == '*':
    657                 line = line[1:]
    658             while line != "" and (line[0] == ' ' or line[0] == '\t'):
    659                 line = line[1:]
    660             try:
    661                 (it, line) = line.split(":", 1)
    662                 item = it
    663                 while line != "" and (line[0] == ' ' or line[0] == '\t'):
    664                     line = line[1:]
    665                 if item in res:
    666                     res[item] = res[item] + " " + line
    667                 else:
    668                     res[item] = line
    669             except:
    670                 if item != None:
    671                     if item in res:
    672                         res[item] = res[item] + " " + line
    673                     else:
    674                         res[item] = line
    675         self.index.info = res
    676 
    677     def parseComment(self, token):
    678         if self.top_comment == "":
    679             self.top_comment = token[1]
    680         if self.comment == None or token[1][0] == '*':
    681             self.comment = token[1];
    682         else:
    683             self.comment = self.comment + token[1]
    684         token = self.lexer.token()
    685 
    686         if self.comment.find("DOC_DISABLE") != -1:
    687             self.stop_error()
    688 
    689         if self.comment.find("DOC_ENABLE") != -1:
    690             self.start_error()
    691 
    692         return token
    693 
    694     #
    695     # Parse a comment block associate to a typedef
    696     #
    697     def parseTypeComment(self, name, quiet = 0):
    698         if name[0:2] == '__':
    699             quiet = 1
    700 
    701         args = []
    702         desc = ""
    703 
    704         if self.comment == None:
    705             if not quiet:
    706                 self.warning("Missing comment for type %s" % (name))
    707             return((args, desc))
    708         if self.comment[0] != '*':
    709             if not quiet:
    710                 self.warning("Missing * in type comment for %s" % (name))
    711             return((args, desc))
    712         lines = self.comment.split('\n')
    713         if lines[0] == '*':
    714             del lines[0]
    715         if lines[0] != "* %s:" % (name):
    716             if not quiet:
    717                 self.warning("Misformatted type comment for %s" % (name))
    718                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
    719             return((args, desc))
    720         del lines[0]
    721         while len(lines) > 0 and lines[0] == '*':
    722             del lines[0]
    723         desc = ""
    724         while len(lines) > 0:
    725             l = lines[0]
    726             while len(l) > 0 and l[0] == '*':
    727                 l = l[1:]
    728             l = l.strip()
    729             desc = desc + " " + l
    730             del lines[0]
    731 
    732         desc = desc.strip()
    733 
    734         if quiet == 0:
    735             if desc == "":
    736                 self.warning("Type comment for %s lack description of the macro" % (name))
    737 
    738         return(desc)
    739     #
    740     # Parse a comment block associate to a macro
    741     #
    742     def parseMacroComment(self, name, quiet = 0):
    743         if name[0:2] == '__':
    744             quiet = 1
    745 
    746         args = []
    747         desc = ""
    748 
    749         if self.comment == None:
    750             if not quiet:
    751                 self.warning("Missing comment for macro %s" % (name))
    752             return((args, desc))
    753         if self.comment[0] != '*':
    754             if not quiet:
    755                 self.warning("Missing * in macro comment for %s" % (name))
    756             return((args, desc))
    757         lines = self.comment.split('\n')
    758         if lines[0] == '*':
    759             del lines[0]
    760         if lines[0] != "* %s:" % (name):
    761             if not quiet:
    762                 self.warning("Misformatted macro comment for %s" % (name))
    763                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
    764             return((args, desc))
    765         del lines[0]
    766         while lines[0] == '*':
    767             del lines[0]
    768         while len(lines) > 0 and lines[0][0:3] == '* @':
    769             l = lines[0][3:]
    770             try:
    771                 (arg, desc) = l.split(':', 1)
    772                 desc=desc.strip()
    773                 arg=arg.strip()
    774             except:
    775                 if not quiet:
    776                     self.warning("Misformatted macro comment for %s" % (name))
    777                     self.warning("  problem with '%s'" % (lines[0]))
    778                 del lines[0]
    779                 continue
    780             del lines[0]
    781             l = lines[0].strip()
    782             while len(l) > 2 and l[0:3] != '* @':
    783                 while l[0] == '*':
    784                     l = l[1:]
    785                 desc = desc + ' ' + l.strip()
    786                 del lines[0]
    787                 if len(lines) == 0:
    788                     break
    789                 l = lines[0]
    790             args.append((arg, desc))
    791         while len(lines) > 0 and lines[0] == '*':
    792             del lines[0]
    793         desc = ""
    794         while len(lines) > 0:
    795             l = lines[0]
    796             while len(l) > 0 and l[0] == '*':
    797                 l = l[1:]
    798             l = l.strip()
    799             desc = desc + " " + l
    800             del lines[0]
    801 
    802         desc = desc.strip()
    803 
    804         if quiet == 0:
    805             if desc == "":
    806                 self.warning("Macro comment for %s lack description of the macro" % (name))
    807 
    808         return((args, desc))
    809 
    810      #
    811      # Parse a comment block and merge the informations found in the
    812      # parameters descriptions, finally returns a block as complete
    813      # as possible
    814      #
    815     def mergeFunctionComment(self, name, description, quiet = 0):
    816         if name == 'main':
    817             quiet = 1
    818         if name[0:2] == '__':
    819             quiet = 1
    820 
    821         (ret, args) = description
    822         desc = ""
    823         retdesc = ""
    824 
    825         if self.comment == None:
    826             if not quiet:
    827                 self.warning("Missing comment for function %s" % (name))
    828             return(((ret[0], retdesc), args, desc))
    829         if self.comment[0] != '*':
    830             if not quiet:
    831                 self.warning("Missing * in function comment for %s" % (name))
    832             return(((ret[0], retdesc), args, desc))
    833         lines = self.comment.split('\n')
    834         if lines[0] == '*':
    835             del lines[0]
    836         if lines[0] != "* %s:" % (name):
    837             if not quiet:
    838                 self.warning("Misformatted function comment for %s" % (name))
    839                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
    840             return(((ret[0], retdesc), args, desc))
    841         del lines[0]
    842         while lines[0] == '*':
    843             del lines[0]
    844         nbargs = len(args)
    845         while len(lines) > 0 and lines[0][0:3] == '* @':
    846             l = lines[0][3:]
    847             try:
    848                 (arg, desc) = l.split(':', 1)
    849                 desc=desc.strip()
    850                 arg=arg.strip()
    851             except:
    852                 if not quiet:
    853                     self.warning("Misformatted function comment for %s" % (name))
    854                     self.warning("  problem with '%s'" % (lines[0]))
    855                 del lines[0]
    856                 continue
    857             del lines[0]
    858             l = lines[0].strip()
    859             while len(l) > 2 and l[0:3] != '* @':
    860                 while l[0] == '*':
    861                     l = l[1:]
    862                 desc = desc + ' ' + l.strip()
    863                 del lines[0]
    864                 if len(lines) == 0:
    865                     break
    866                 l = lines[0]
    867             i = 0
    868             while i < nbargs:
    869                 if args[i][1] == arg:
    870                     args[i] = (args[i][0], arg, desc)
    871                     break;
    872                 i = i + 1
    873             if i >= nbargs:
    874                 if not quiet:
    875                     self.warning("Unable to find arg %s from function comment for %s" % (
    876                        arg, name))
    877         while len(lines) > 0 and lines[0] == '*':
    878             del lines[0]
    879         desc = ""
    880         while len(lines) > 0:
    881             l = lines[0]
    882             while len(l) > 0 and l[0] == '*':
    883                 l = l[1:]
    884             l = l.strip()
    885             if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
    886                 try:
    887                     l = l.split(' ', 1)[1]
    888                 except:
    889                     l = ""
    890                 retdesc = l.strip()
    891                 del lines[0]
    892                 while len(lines) > 0:
    893                     l = lines[0]
    894                     while len(l) > 0 and l[0] == '*':
    895                         l = l[1:]
    896                     l = l.strip()
    897                     retdesc = retdesc + " " + l
    898                     del lines[0]
    899             else:
    900                 desc = desc + " " + l
    901                 del lines[0]
    902 
    903         retdesc = retdesc.strip()
    904         desc = desc.strip()
    905 
    906         if quiet == 0:
    907              #
    908              # report missing comments
    909              #
    910             i = 0
    911             while i < nbargs:
    912                 if args[i][2] == None and args[i][0] != "void" and \
    913                    ((args[i][1] != None) or (args[i][1] == '')):
    914                     self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
    915                 i = i + 1
    916             if retdesc == "" and ret[0] != "void":
    917                 self.warning("Function comment for %s lacks description of return value" % (name))
    918             if desc == "":
    919                 self.warning("Function comment for %s lacks description of the function" % (name))
    920 
    921         return(((ret[0], retdesc), args, desc))
    922 
    923     def parsePreproc(self, token):
    924         if debug:
    925             print("=> preproc ", token, self.lexer.tokens)
    926         name = token[1]
    927         if name == "#include":
    928             token = self.lexer.token()
    929             if token == None:
    930                 return None
    931             if token[0] == 'preproc':
    932                 self.index_add(token[1], self.filename, not self.is_header,
    933                                 "include")
    934                 return self.lexer.token()
    935             return token
    936         if name == "#define":
    937             token = self.lexer.token()
    938             if token == None:
    939                 return None
    940             if token[0] == 'preproc':
    941                  # TODO macros with arguments
    942                 name = token[1]
    943                 lst = []
    944                 token = self.lexer.token()
    945                 while token != None and token[0] == 'preproc' and \
    946                       token[1][0] != '#':
    947                     lst.append(token[1])
    948                     token = self.lexer.token()
    949                 try:
    950                     name = name.split('(') [0]
    951                 except:
    952                     pass
    953                 info = self.parseMacroComment(name, not self.is_header)
    954                 self.index_add(name, self.filename, not self.is_header,
    955                                 "macro", info)
    956                 return token
    957 
    958         #
    959         # Processing of conditionals modified by Bill 1/1/05
    960         #
    961         # We process conditionals (i.e. tokens from #ifdef, #ifndef,
    962         # #if, #else and #endif) for headers and mainline code,
    963         # store the ones from the header in libxml2-api.xml, and later
    964         # (in the routine merge_public) verify that the two (header and
    965         # mainline code) agree.
    966         #
    967         # There is a small problem with processing the headers. Some of
    968         # the variables are not concerned with enabling / disabling of
    969         # library functions (e.g. '__XML_PARSER_H__'), and we don't want
    970         # them to be included in libxml2-api.xml, or involved in
    971         # the check between the header and the mainline code.  To
    972         # accomplish this, we ignore any conditional which doesn't include
    973         # the string 'ENABLED'
    974         #
    975         if name == "#ifdef":
    976             apstr = self.lexer.tokens[0][1]
    977             try:
    978                 self.defines.append(apstr)
    979                 if apstr.find('ENABLED') != -1:
    980                     self.conditionals.append("defined(%s)" % apstr)
    981             except:
    982                 pass
    983         elif name == "#ifndef":
    984             apstr = self.lexer.tokens[0][1]
    985             try:
    986                 self.defines.append(apstr)
    987                 if apstr.find('ENABLED') != -1:
    988                     self.conditionals.append("!defined(%s)" % apstr)
    989             except:
    990                 pass
    991         elif name == "#if":
    992             apstr = ""
    993             for tok in self.lexer.tokens:
    994                 if apstr != "":
    995                     apstr = apstr + " "
    996                 apstr = apstr + tok[1]
    997             try:
    998                 self.defines.append(apstr)
    999                 if apstr.find('ENABLED') != -1:
   1000                     self.conditionals.append(apstr)
   1001             except:
   1002                 pass
   1003         elif name == "#else":
   1004             if self.conditionals != [] and \
   1005                self.defines[-1].find('ENABLED') != -1:
   1006                 self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
   1007         elif name == "#endif":
   1008             if self.conditionals != [] and \
   1009                self.defines[-1].find('ENABLED') != -1:
   1010                 self.conditionals = self.conditionals[:-1]
   1011             self.defines = self.defines[:-1]
   1012         token = self.lexer.token()
   1013         while token != None and token[0] == 'preproc' and \
   1014             token[1][0] != '#':
   1015             token = self.lexer.token()
   1016         return token
   1017 
   1018      #
   1019      # token acquisition on top of the lexer, it handle internally
   1020      # preprocessor and comments since they are logically not part of
   1021      # the program structure.
   1022      #
   1023     def token(self):
   1024         global ignored_words
   1025 
   1026         token = self.lexer.token()
   1027         while token != None:
   1028             if token[0] == 'comment':
   1029                 token = self.parseComment(token)
   1030                 continue
   1031             elif token[0] == 'preproc':
   1032                 token = self.parsePreproc(token)
   1033                 continue
   1034             elif token[0] == "name" and token[1] == "__const":
   1035                 token = ("name", "const")
   1036                 return token
   1037             elif token[0] == "name" and token[1] == "__attribute":
   1038                 token = self.lexer.token()
   1039                 while token != None and token[1] != ";":
   1040                     token = self.lexer.token()
   1041                 return token
   1042             elif token[0] == "name" and token[1] in ignored_words:
   1043                 (n, info) = ignored_words[token[1]]
   1044                 i = 0
   1045                 while i < n:
   1046                     token = self.lexer.token()
   1047                     i = i + 1
   1048                 token = self.lexer.token()
   1049                 continue
   1050             else:
   1051                 if debug:
   1052                     print("=> ", token)
   1053                 return token
   1054         return None
   1055 
   1056      #
   1057      # Parse a typedef, it records the type and its name.
   1058      #
   1059     def parseTypedef(self, token):
   1060         if token == None:
   1061             return None
   1062         token = self.parseType(token)
   1063         if token == None:
   1064             self.error("parsing typedef")
   1065             return None
   1066         base_type = self.type
   1067         type = base_type
   1068          #self.debug("end typedef type", token)
   1069         while token != None:
   1070             if token[0] == "name":
   1071                 name = token[1]
   1072                 signature = self.signature
   1073                 if signature != None:
   1074                     type = type.split('(')[0]
   1075                     d = self.mergeFunctionComment(name,
   1076                             ((type, None), signature), 1)
   1077                     self.index_add(name, self.filename, not self.is_header,
   1078                                     "functype", d)
   1079                 else:
   1080                     if base_type == "struct":
   1081                         self.index_add(name, self.filename, not self.is_header,
   1082                                         "struct", type)
   1083                         base_type = "struct " + name
   1084                     else:
   1085                         # TODO report missing or misformatted comments
   1086                         info = self.parseTypeComment(name, 1)
   1087                         self.index_add(name, self.filename, not self.is_header,
   1088                                     "typedef", type, info)
   1089                 token = self.token()
   1090             else:
   1091                 self.error("parsing typedef: expecting a name")
   1092                 return token
   1093              #self.debug("end typedef", token)
   1094             if token != None and token[0] == 'sep' and token[1] == ',':
   1095                 type = base_type
   1096                 token = self.token()
   1097                 while token != None and token[0] == "op":
   1098                     type = type + token[1]
   1099                     token = self.token()
   1100             elif token != None and token[0] == 'sep' and token[1] == ';':
   1101                 break;
   1102             elif token != None and token[0] == 'name':
   1103                 type = base_type
   1104                 continue;
   1105             else:
   1106                 self.error("parsing typedef: expecting ';'", token)
   1107                 return token
   1108         token = self.token()
   1109         return token
   1110 
   1111      #
   1112      # Parse a C code block, used for functions it parse till
   1113      # the balancing } included
   1114      #
   1115     def parseBlock(self, token):
   1116         while token != None:
   1117             if token[0] == "sep" and token[1] == "{":
   1118                 token = self.token()
   1119                 token = self.parseBlock(token)
   1120             elif token[0] == "sep" and token[1] == "}":
   1121                 self.comment = None
   1122                 token = self.token()
   1123                 return token
   1124             else:
   1125                 if self.collect_ref == 1:
   1126                     oldtok = token
   1127                     token = self.token()
   1128                     if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
   1129                         if token[0] == "sep" and token[1] == "(":
   1130                             self.index_add_ref(oldtok[1], self.filename,
   1131                                                 0, "function")
   1132                             token = self.token()
   1133                         elif token[0] == "name":
   1134                             token = self.token()
   1135                             if token[0] == "sep" and (token[1] == ";" or
   1136                                token[1] == "," or token[1] == "="):
   1137                                 self.index_add_ref(oldtok[1], self.filename,
   1138                                                     0, "type")
   1139                     elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
   1140                         self.index_add_ref(oldtok[1], self.filename,
   1141                                             0, "typedef")
   1142                     elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
   1143                         self.index_add_ref(oldtok[1], self.filename,
   1144                                             0, "typedef")
   1145 
   1146                 else:
   1147                     token = self.token()
   1148         return token
   1149 
   1150      #
   1151      # Parse a C struct definition till the balancing }
   1152      #
   1153     def parseStruct(self, token):
   1154         fields = []
   1155          #self.debug("start parseStruct", token)
   1156         while token != None:
   1157             if token[0] == "sep" and token[1] == "{":
   1158                 token = self.token()
   1159                 token = self.parseTypeBlock(token)
   1160             elif token[0] == "sep" and token[1] == "}":
   1161                 self.struct_fields = fields
   1162                  #self.debug("end parseStruct", token)
   1163                  #print fields
   1164                 token = self.token()
   1165                 return token
   1166             else:
   1167                 base_type = self.type
   1168                  #self.debug("before parseType", token)
   1169                 token = self.parseType(token)
   1170                  #self.debug("after parseType", token)
   1171                 if token != None and token[0] == "name":
   1172                     fname = token[1]
   1173                     token = self.token()
   1174                     if token[0] == "sep" and token[1] == ";":
   1175                         self.comment = None
   1176                         token = self.token()
   1177                         fields.append((self.type, fname, self.comment))
   1178                         self.comment = None
   1179                     else:
   1180                         self.error("parseStruct: expecting ;", token)
   1181                 elif token != None and token[0] == "sep" and token[1] == "{":
   1182                     token = self.token()
   1183                     token = self.parseTypeBlock(token)
   1184                     if token != None and token[0] == "name":
   1185                         token = self.token()
   1186                     if token != None and token[0] == "sep" and token[1] == ";":
   1187                         token = self.token()
   1188                     else:
   1189                         self.error("parseStruct: expecting ;", token)
   1190                 else:
   1191                     self.error("parseStruct: name", token)
   1192                     token = self.token()
   1193                 self.type = base_type;
   1194         self.struct_fields = fields
   1195          #self.debug("end parseStruct", token)
   1196          #print fields
   1197         return token
   1198 
   1199      #
   1200      # Parse a C enum block, parse till the balancing }
   1201      #
   1202     def parseEnumBlock(self, token):
   1203         self.enums = []
   1204         name = None
   1205         self.comment = None
   1206         comment = ""
   1207         value = "0"
   1208         while token != None:
   1209             if token[0] == "sep" and token[1] == "{":
   1210                 token = self.token()
   1211                 token = self.parseTypeBlock(token)
   1212             elif token[0] == "sep" and token[1] == "}":
   1213                 if name != None:
   1214                     if self.comment != None:
   1215                         comment = self.comment
   1216                         self.comment = None
   1217                     self.enums.append((name, value, comment))
   1218                 token = self.token()
   1219                 return token
   1220             elif token[0] == "name":
   1221                     if name != None:
   1222                         if self.comment != None:
   1223                             comment = self.comment.strip()
   1224                             self.comment = None
   1225                         self.enums.append((name, value, comment))
   1226                     name = token[1]
   1227                     comment = ""
   1228                     token = self.token()
   1229                     if token[0] == "op" and token[1][0] == "=":
   1230                         value = ""
   1231                         if len(token[1]) > 1:
   1232                             value = token[1][1:]
   1233                         token = self.token()
   1234                         while token[0] != "sep" or (token[1] != ',' and
   1235                               token[1] != '}'):
   1236                             value = value + token[1]
   1237                             token = self.token()
   1238                     else:
   1239                         try:
   1240                             value = "%d" % (int(value) + 1)
   1241                         except:
   1242                             self.warning("Failed to compute value of enum %s" % (name))
   1243                             value=""
   1244                     if token[0] == "sep" and token[1] == ",":
   1245                         token = self.token()
   1246             else:
   1247                 token = self.token()
   1248         return token
   1249 
   1250      #
   1251      # Parse a C definition block, used for structs it parse till
   1252      # the balancing }
   1253      #
   1254     def parseTypeBlock(self, token):
   1255         while token != None:
   1256             if token[0] == "sep" and token[1] == "{":
   1257                 token = self.token()
   1258                 token = self.parseTypeBlock(token)
   1259             elif token[0] == "sep" and token[1] == "}":
   1260                 token = self.token()
   1261                 return token
   1262             else:
   1263                 token = self.token()
   1264         return token
   1265 
   1266      #
   1267      # Parse a type: the fact that the type name can either occur after
   1268      #    the definition or within the definition makes it a little harder
   1269      #    if inside, the name token is pushed back before returning
   1270      #
   1271     def parseType(self, token):
   1272         self.type = ""
   1273         self.struct_fields = []
   1274         self.signature = None
   1275         if token == None:
   1276             return token
   1277 
   1278         while token[0] == "name" and (
   1279               token[1] == "const" or \
   1280               token[1] == "unsigned" or \
   1281               token[1] == "signed"):
   1282             if self.type == "":
   1283                 self.type = token[1]
   1284             else:
   1285                 self.type = self.type + " " + token[1]
   1286             token = self.token()
   1287 
   1288         if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
   1289             if self.type == "":
   1290                 self.type = token[1]
   1291             else:
   1292                 self.type = self.type + " " + token[1]
   1293             if token[0] == "name" and token[1] == "int":
   1294                 if self.type == "":
   1295                     self.type = tmp[1]
   1296                 else:
   1297                     self.type = self.type + " " + tmp[1]
   1298 
   1299         elif token[0] == "name" and token[1] == "struct":
   1300             if self.type == "":
   1301                 self.type = token[1]
   1302             else:
   1303                 self.type = self.type + " " + token[1]
   1304             token = self.token()
   1305             nametok = None
   1306             if token[0] == "name":
   1307                 nametok = token
   1308                 token = self.token()
   1309             if token != None and token[0] == "sep" and token[1] == "{":
   1310                 token = self.token()
   1311                 token = self.parseStruct(token)
   1312             elif token != None and token[0] == "op" and token[1] == "*":
   1313                 self.type = self.type + " " + nametok[1] + " *"
   1314                 token = self.token()
   1315                 while token != None and token[0] == "op" and token[1] == "*":
   1316                     self.type = self.type + " *"
   1317                     token = self.token()
   1318                 if token[0] == "name":
   1319                     nametok = token
   1320                     token = self.token()
   1321                 else:
   1322                     self.error("struct : expecting name", token)
   1323                     return token
   1324             elif token != None and token[0] == "name" and nametok != None:
   1325                 self.type = self.type + " " + nametok[1]
   1326                 return token
   1327 
   1328             if nametok != None:
   1329                 self.lexer.push(token)
   1330                 token = nametok
   1331             return token
   1332 
   1333         elif token[0] == "name" and token[1] == "enum":
   1334             if self.type == "":
   1335                 self.type = token[1]
   1336             else:
   1337                 self.type = self.type + " " + token[1]
   1338             self.enums = []
   1339             token = self.token()
   1340             if token != None and token[0] == "sep" and token[1] == "{":
   1341                 token = self.token()
   1342                 token = self.parseEnumBlock(token)
   1343             else:
   1344                 self.error("parsing enum: expecting '{'", token)
   1345             enum_type = None
   1346             if token != None and token[0] != "name":
   1347                 self.lexer.push(token)
   1348                 token = ("name", "enum")
   1349             else:
   1350                 enum_type = token[1]
   1351             for enum in self.enums:
   1352                 self.index_add(enum[0], self.filename,
   1353                                not self.is_header, "enum",
   1354                                (enum[1], enum[2], enum_type))
   1355             return token
   1356 
   1357         elif token[0] == "name":
   1358             if self.type == "":
   1359                 self.type = token[1]
   1360             else:
   1361                 self.type = self.type + " " + token[1]
   1362         else:
   1363             self.error("parsing type %s: expecting a name" % (self.type),
   1364                        token)
   1365             return token
   1366         token = self.token()
   1367         while token != None and (token[0] == "op" or
   1368               token[0] == "name" and token[1] == "const"):
   1369             self.type = self.type + " " + token[1]
   1370             token = self.token()
   1371 
   1372          #
   1373          # if there is a parenthesis here, this means a function type
   1374          #
   1375         if token != None and token[0] == "sep" and token[1] == '(':
   1376             self.type = self.type + token[1]
   1377             token = self.token()
   1378             while token != None and token[0] == "op" and token[1] == '*':
   1379                 self.type = self.type + token[1]
   1380                 token = self.token()
   1381             if token == None or token[0] != "name" :
   1382                 self.error("parsing function type, name expected", token);
   1383                 return token
   1384             self.type = self.type + token[1]
   1385             nametok = token
   1386             token = self.token()
   1387             if token != None and token[0] == "sep" and token[1] == ')':
   1388                 self.type = self.type + token[1]
   1389                 token = self.token()
   1390                 if token != None and token[0] == "sep" and token[1] == '(':
   1391                     token = self.token()
   1392                     type = self.type;
   1393                     token = self.parseSignature(token);
   1394                     self.type = type;
   1395                 else:
   1396                     self.error("parsing function type, '(' expected", token);
   1397                     return token
   1398             else:
   1399                 self.error("parsing function type, ')' expected", token);
   1400                 return token
   1401             self.lexer.push(token)
   1402             token = nametok
   1403             return token
   1404 
   1405          #
   1406          # do some lookahead for arrays
   1407          #
   1408         if token != None and token[0] == "name":
   1409             nametok = token
   1410             token = self.token()
   1411             if token != None and token[0] == "sep" and token[1] == '[':
   1412                 self.type = self.type + nametok[1]
   1413                 while token != None and token[0] == "sep" and token[1] == '[':
   1414                     self.type = self.type + token[1]
   1415                     token = self.token()
   1416                     while token != None and token[0] != 'sep' and \
   1417                           token[1] != ']' and token[1] != ';':
   1418                         self.type = self.type + token[1]
   1419                         token = self.token()
   1420                 if token != None and token[0] == 'sep' and token[1] == ']':
   1421                     self.type = self.type + token[1]
   1422                     token = self.token()
   1423                 else:
   1424                     self.error("parsing array type, ']' expected", token);
   1425                     return token
   1426             elif token != None and token[0] == "sep" and token[1] == ':':
   1427                  # remove :12 in case it's a limited int size
   1428                 token = self.token()
   1429                 token = self.token()
   1430             self.lexer.push(token)
   1431             token = nametok
   1432 
   1433         return token
   1434 
   1435      #
   1436      # Parse a signature: '(' has been parsed and we scan the type definition
   1437      #    up to the ')' included
   1438     def parseSignature(self, token):
   1439         signature = []
   1440         if token != None and token[0] == "sep" and token[1] == ')':
   1441             self.signature = []
   1442             token = self.token()
   1443             return token
   1444         while token != None:
   1445             token = self.parseType(token)
   1446             if token != None and token[0] == "name":
   1447                 signature.append((self.type, token[1], None))
   1448                 token = self.token()
   1449             elif token != None and token[0] == "sep" and token[1] == ',':
   1450                 token = self.token()
   1451                 continue
   1452             elif token != None and token[0] == "sep" and token[1] == ')':
   1453                  # only the type was provided
   1454                 if self.type == "...":
   1455                     signature.append((self.type, "...", None))
   1456                 else:
   1457                     signature.append((self.type, None, None))
   1458             if token != None and token[0] == "sep":
   1459                 if token[1] == ',':
   1460                     token = self.token()
   1461                     continue
   1462                 elif token[1] == ')':
   1463                     token = self.token()
   1464                     break
   1465         self.signature = signature
   1466         return token
   1467 
   1468      #
   1469      # Parse a global definition, be it a type, variable or function
   1470      # the extern "C" blocks are a bit nasty and require it to recurse.
   1471      #
   1472     def parseGlobal(self, token):
   1473         static = 0
   1474         if token[1] == 'extern':
   1475             token = self.token()
   1476             if token == None:
   1477                 return token
   1478             if token[0] == 'string':
   1479                 if token[1] == 'C':
   1480                     token = self.token()
   1481                     if token == None:
   1482                         return token
   1483                     if token[0] == 'sep' and token[1] == "{":
   1484                         token = self.token()
   1485 #                         print 'Entering extern "C line ', self.lineno()
   1486                         while token != None and (token[0] != 'sep' or
   1487                               token[1] != "}"):
   1488                             if token[0] == 'name':
   1489                                 token = self.parseGlobal(token)
   1490                             else:
   1491                                 self.error(
   1492                                  "token %s %s unexpected at the top level" % (
   1493                                         token[0], token[1]))
   1494                                 token = self.parseGlobal(token)
   1495 #                         print 'Exiting extern "C" line', self.lineno()
   1496                         token = self.token()
   1497                         return token
   1498                 else:
   1499                     return token
   1500         elif token[1] == 'static':
   1501             static = 1
   1502             token = self.token()
   1503             if token == None or  token[0] != 'name':
   1504                 return token
   1505 
   1506         if token[1] == 'typedef':
   1507             token = self.token()
   1508             return self.parseTypedef(token)
   1509         else:
   1510             token = self.parseType(token)
   1511             type_orig = self.type
   1512         if token == None or token[0] != "name":
   1513             return token
   1514         type = type_orig
   1515         self.name = token[1]
   1516         token = self.token()
   1517         while token != None and (token[0] == "sep" or token[0] == "op"):
   1518             if token[0] == "sep":
   1519                 if token[1] == "[":
   1520                     type = type + token[1]
   1521                     token = self.token()
   1522                     while token != None and (token[0] != "sep" or \
   1523                           token[1] != ";"):
   1524                         type = type + token[1]
   1525                         token = self.token()
   1526 
   1527             if token != None and token[0] == "op" and token[1] == "=":
   1528                  #
   1529                  # Skip the initialization of the variable
   1530                  #
   1531                 token = self.token()
   1532                 if token[0] == 'sep' and token[1] == '{':
   1533                     token = self.token()
   1534                     token = self.parseBlock(token)
   1535                 else:
   1536                     self.comment = None
   1537                     while token != None and (token[0] != "sep" or \
   1538                           (token[1] != ';' and token[1] != ',')):
   1539                             token = self.token()
   1540                 self.comment = None
   1541                 if token == None or token[0] != "sep" or (token[1] != ';' and
   1542                    token[1] != ','):
   1543                     self.error("missing ';' or ',' after value")
   1544 
   1545             if token != None and token[0] == "sep":
   1546                 if token[1] == ";":
   1547                     self.comment = None
   1548                     token = self.token()
   1549                     if type == "struct":
   1550                         self.index_add(self.name, self.filename,
   1551                              not self.is_header, "struct", self.struct_fields)
   1552                     else:
   1553                         self.index_add(self.name, self.filename,
   1554                              not self.is_header, "variable", type)
   1555                     break
   1556                 elif token[1] == "(":
   1557                     token = self.token()
   1558                     token = self.parseSignature(token)
   1559                     if token == None:
   1560                         return None
   1561                     if token[0] == "sep" and token[1] == ";":
   1562                         d = self.mergeFunctionComment(self.name,
   1563                                 ((type, None), self.signature), 1)
   1564                         self.index_add(self.name, self.filename, static,
   1565                                         "function", d)
   1566                         token = self.token()
   1567                     elif token[0] == "sep" and token[1] == "{":
   1568                         d = self.mergeFunctionComment(self.name,
   1569                                 ((type, None), self.signature), static)
   1570                         self.index_add(self.name, self.filename, static,
   1571                                         "function", d)
   1572                         token = self.token()
   1573                         token = self.parseBlock(token);
   1574                 elif token[1] == ',':
   1575                     self.comment = None
   1576                     self.index_add(self.name, self.filename, static,
   1577                                     "variable", type)
   1578                     type = type_orig
   1579                     token = self.token()
   1580                     while token != None and token[0] == "sep":
   1581                         type = type + token[1]
   1582                         token = self.token()
   1583                     if token != None and token[0] == "name":
   1584                         self.name = token[1]
   1585                         token = self.token()
   1586                 else:
   1587                     break
   1588 
   1589         return token
   1590 
   1591     def parse(self):
   1592         self.warning("Parsing %s" % (self.filename))
   1593         token = self.token()
   1594         while token != None:
   1595             if token[0] == 'name':
   1596                 token = self.parseGlobal(token)
   1597             else:
   1598                 self.error("token %s %s unexpected at the top level" % (
   1599                        token[0], token[1]))
   1600                 token = self.parseGlobal(token)
   1601                 return
   1602         self.parseTopComment(self.top_comment)
   1603         return self.index
   1604 
   1605 
   1606 class docBuilder:
   1607     """A documentation builder"""
   1608     def __init__(self, name, directories=['.'], excludes=[]):
   1609         self.name = name
   1610         self.directories = directories
   1611         self.excludes = excludes + list(ignored_files.keys())
   1612         self.modules = {}
   1613         self.headers = {}
   1614         self.idx = index()
   1615         self.xref = {}
   1616         self.index = {}
   1617         if name == 'libxml2':
   1618             self.basename = 'libxml'
   1619         else:
   1620             self.basename = name
   1621 
   1622     def indexString(self, id, str):
   1623         if str == None:
   1624             return
   1625         str = str.replace("'", ' ')
   1626         str = str.replace('"', ' ')
   1627         str = str.replace("/", ' ')
   1628         str = str.replace('*', ' ')
   1629         str = str.replace("[", ' ')
   1630         str = str.replace("]", ' ')
   1631         str = str.replace("(", ' ')
   1632         str = str.replace(")", ' ')
   1633         str = str.replace("<", ' ')
   1634         str = str.replace('>', ' ')
   1635         str = str.replace("&", ' ')
   1636         str = str.replace('#', ' ')
   1637         str = str.replace(",", ' ')
   1638         str = str.replace('.', ' ')
   1639         str = str.replace(';', ' ')
   1640         tokens = str.split()
   1641         for token in tokens:
   1642             try:
   1643                 c = token[0]
   1644                 if string.ascii_letters.find(c) < 0:
   1645                     pass
   1646                 elif len(token) < 3:
   1647                     pass
   1648                 else:
   1649                     lower = token.lower()
   1650                     # TODO: generalize this a bit
   1651                     if lower == 'and' or lower == 'the':
   1652                         pass
   1653                     elif token in self.xref:
   1654                         self.xref[token].append(id)
   1655                     else:
   1656                         self.xref[token] = [id]
   1657             except:
   1658                 pass
   1659 
   1660     def analyze(self):
   1661         print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
   1662         self.idx.analyze()
   1663 
   1664     def scanHeaders(self):
   1665         for header in list(self.headers.keys()):
   1666             parser = CParser(header)
   1667             idx = parser.parse()
   1668             self.headers[header] = idx;
   1669             self.idx.merge(idx)
   1670 
   1671     def scanModules(self):
   1672         for module in list(self.modules.keys()):
   1673             parser = CParser(module)
   1674             idx = parser.parse()
   1675             # idx.analyze()
   1676             self.modules[module] = idx
   1677             self.idx.merge_public(idx)
   1678 
   1679     def scan(self):
   1680         for directory in self.directories:
   1681             files = glob.glob(directory + "/*.c")
   1682             for file in files:
   1683                 skip = 0
   1684                 for excl in self.excludes:
   1685                     if file.find(excl) != -1:
   1686                         print("Skipping %s" % file)
   1687                         skip = 1
   1688                         break
   1689                 if skip == 0:
   1690                     self.modules[file] = None;
   1691             files = glob.glob(directory + "/*.h")
   1692             for file in files:
   1693                 skip = 0
   1694                 for excl in self.excludes:
   1695                     if file.find(excl) != -1:
   1696                         print("Skipping %s" % file)
   1697                         skip = 1
   1698                         break
   1699                 if skip == 0:
   1700                     self.headers[file] = None;
   1701         self.scanHeaders()
   1702         self.scanModules()
   1703 
   1704     def modulename_file(self, file):
   1705         module = os.path.basename(file)
   1706         if module[-2:] == '.h':
   1707             module = module[:-2]
   1708         elif module[-2:] == '.c':
   1709             module = module[:-2]
   1710         return module
   1711 
   1712     def serialize_enum(self, output, name):
   1713         id = self.idx.enums[name]
   1714         output.write("    <enum name='%s' file='%s'" % (name,
   1715                      self.modulename_file(id.header)))
   1716         if id.info != None:
   1717             info = id.info
   1718             if info[0] != None and info[0] != '':
   1719                 try:
   1720                     val = eval(info[0])
   1721                 except:
   1722                     val = info[0]
   1723                 output.write(" value='%s'" % (val));
   1724             if info[2] != None and info[2] != '':
   1725                 output.write(" type='%s'" % info[2]);
   1726             if info[1] != None and info[1] != '':
   1727                 output.write(" info='%s'" % escape(info[1]));
   1728         output.write("/>\n")
   1729 
   1730     def serialize_macro(self, output, name):
   1731         id = self.idx.macros[name]
   1732         output.write("    <macro name='%s' file='%s'>\n" % (name,
   1733                      self.modulename_file(id.header)))
   1734         if id.info != None:
   1735             try:
   1736                 (args, desc) = id.info
   1737                 if desc != None and desc != "":
   1738                     output.write("      <info>%s</info>\n" % (escape(desc)))
   1739                     self.indexString(name, desc)
   1740                 for arg in args:
   1741                     (name, desc) = arg
   1742                     if desc != None and desc != "":
   1743                         output.write("      <arg name='%s' info='%s'/>\n" % (
   1744                                      name, escape(desc)))
   1745                         self.indexString(name, desc)
   1746                     else:
   1747                         output.write("      <arg name='%s'/>\n" % (name))
   1748             except:
   1749                 pass
   1750         output.write("    </macro>\n")
   1751 
   1752     def serialize_typedef(self, output, name):
   1753         id = self.idx.typedefs[name]
   1754         if id.info[0:7] == 'struct ':
   1755             output.write("    <struct name='%s' file='%s' type='%s'" % (
   1756                      name, self.modulename_file(id.header), id.info))
   1757             name = id.info[7:]
   1758             if name in self.idx.structs and ( \
   1759                type(self.idx.structs[name].info) == type(()) or
   1760                 type(self.idx.structs[name].info) == type([])):
   1761                 output.write(">\n");
   1762                 try:
   1763                     for field in self.idx.structs[name].info:
   1764                         desc = field[2]
   1765                         self.indexString(name, desc)
   1766                         if desc == None:
   1767                             desc = ''
   1768                         else:
   1769                             desc = escape(desc)
   1770                         output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
   1771                 except:
   1772                     print("Failed to serialize struct %s" % (name))
   1773                 output.write("    </struct>\n")
   1774             else:
   1775                 output.write("/>\n");
   1776         else :
   1777             output.write("    <typedef name='%s' file='%s' type='%s'" % (
   1778                          name, self.modulename_file(id.header), id.info))
   1779             try:
   1780                 desc = id.extra
   1781                 if desc != None and desc != "":
   1782                     output.write(">\n      <info>%s</info>\n" % (escape(desc)))
   1783                     output.write("    </typedef>\n")
   1784                 else:
   1785                     output.write("/>\n")
   1786             except:
   1787                 output.write("/>\n")
   1788 
   1789     def serialize_variable(self, output, name):
   1790         id = self.idx.variables[name]
   1791         if id.info != None:
   1792             output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
   1793                     name, self.modulename_file(id.header), id.info))
   1794         else:
   1795             output.write("    <variable name='%s' file='%s'/>\n" % (
   1796                     name, self.modulename_file(id.header)))
   1797 
   1798     def serialize_function(self, output, name):
   1799         id = self.idx.functions[name]
   1800         if name == debugsym:
   1801             print("=>", id)
   1802 
   1803         output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
   1804                      name, self.modulename_file(id.header),
   1805                      self.modulename_file(id.module)))
   1806         #
   1807         # Processing of conditionals modified by Bill 1/1/05
   1808         #
   1809         if id.conditionals != None:
   1810             apstr = ""
   1811             for cond in id.conditionals:
   1812                 if apstr != "":
   1813                     apstr = apstr + " &amp;&amp; "
   1814                 apstr = apstr + cond
   1815             output.write("      <cond>%s</cond>\n"% (apstr));
   1816         try:
   1817             (ret, params, desc) = id.info
   1818             if (desc == None or desc == '') and \
   1819                name[0:9] != "xmlThrDef" and name != "xmlDllMain":
   1820                 print("%s %s from %s has no description" % (id.type, name,
   1821                        self.modulename_file(id.module)))
   1822 
   1823             output.write("      <info>%s</info>\n" % (escape(desc)))
   1824             self.indexString(name, desc)
   1825             if ret[0] != None:
   1826                 if ret[0] == "void":
   1827                     output.write("      <return type='void'/>\n")
   1828                 else:
   1829                     output.write("      <return type='%s' info='%s'/>\n" % (
   1830                              ret[0], escape(ret[1])))
   1831                     self.indexString(name, ret[1])
   1832             for param in params:
   1833                 if param[0] == 'void':
   1834                     continue
   1835                 if param[2] == None:
   1836                     output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
   1837                 else:
   1838                     output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
   1839                     self.indexString(name, param[2])
   1840         except:
   1841             print("Failed to save function %s info: " % name, repr(id.info))
   1842         output.write("    </%s>\n" % (id.type))
   1843 
   1844     def serialize_exports(self, output, file):
   1845         module = self.modulename_file(file)
   1846         output.write("    <file name='%s'>\n" % (module))
   1847         dict = self.headers[file]
   1848         if dict.info != None:
   1849             for data in ('Summary', 'Description', 'Author'):
   1850                 try:
   1851                     output.write("     <%s>%s</%s>\n" % (
   1852                                  data.lower(),
   1853                                  escape(dict.info[data]),
   1854                                  data.lower()))
   1855                 except:
   1856                     print("Header %s lacks a %s description" % (module, data))
   1857             if 'Description' in dict.info:
   1858                 desc = dict.info['Description']
   1859                 if desc.find("DEPRECATED") != -1:
   1860                     output.write("     <deprecated/>\n")
   1861 
   1862         ids = list(dict.macros.keys())
   1863         ids.sort()
   1864         for id in uniq(ids):
   1865             # Macros are sometime used to masquerade other types.
   1866             if id in dict.functions:
   1867                 continue
   1868             if id in dict.variables:
   1869                 continue
   1870             if id in dict.typedefs:
   1871                 continue
   1872             if id in dict.structs:
   1873                 continue
   1874             if id in dict.enums:
   1875                 continue
   1876             output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
   1877         ids = list(dict.enums.keys())
   1878         ids.sort()
   1879         for id in uniq(ids):
   1880             output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
   1881         ids = list(dict.typedefs.keys())
   1882         ids.sort()
   1883         for id in uniq(ids):
   1884             output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
   1885         ids = list(dict.structs.keys())
   1886         ids.sort()
   1887         for id in uniq(ids):
   1888             output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
   1889         ids = list(dict.variables.keys())
   1890         ids.sort()
   1891         for id in uniq(ids):
   1892             output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
   1893         ids = list(dict.functions.keys())
   1894         ids.sort()
   1895         for id in uniq(ids):
   1896             output.write("     <exports symbol='%s' type='function'/>\n" % (id))
   1897         output.write("    </file>\n")
   1898 
   1899     def serialize_xrefs_files(self, output):
   1900         headers = list(self.headers.keys())
   1901         headers.sort()
   1902         for file in headers:
   1903             module = self.modulename_file(file)
   1904             output.write("    <file name='%s'>\n" % (module))
   1905             dict = self.headers[file]
   1906             ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \
   1907                   list(dict.macros.keys()) + list(dict.typedefs.keys()) + \
   1908                   list(dict.structs.keys()) + list(dict.enums.keys()))
   1909             ids.sort()
   1910             for id in ids:
   1911                 output.write("      <ref name='%s'/>\n" % (id))
   1912             output.write("    </file>\n")
   1913         pass
   1914 
   1915     def serialize_xrefs_functions(self, output):
   1916         funcs = {}
   1917         for name in list(self.idx.functions.keys()):
   1918             id = self.idx.functions[name]
   1919             try:
   1920                 (ret, params, desc) = id.info
   1921                 for param in params:
   1922                     if param[0] == 'void':
   1923                         continue
   1924                     if param[0] in funcs:
   1925                         funcs[param[0]].append(name)
   1926                     else:
   1927                         funcs[param[0]] = [name]
   1928             except:
   1929                 pass
   1930         typ = list(funcs.keys())
   1931         typ.sort()
   1932         for type in typ:
   1933             if type == '' or type == 'void' or type == "int" or \
   1934                type == "char *" or type == "const char *" :
   1935                 continue
   1936             output.write("    <type name='%s'>\n" % (type))
   1937             ids = funcs[type]
   1938             ids.sort()
   1939             pid = ''        # not sure why we have dups, but get rid of them!
   1940             for id in ids:
   1941                 if id != pid:
   1942                     output.write("      <ref name='%s'/>\n" % (id))
   1943                     pid = id
   1944             output.write("    </type>\n")
   1945 
   1946     def serialize_xrefs_constructors(self, output):
   1947         funcs = {}
   1948         for name in list(self.idx.functions.keys()):
   1949             id = self.idx.functions[name]
   1950             try:
   1951                 (ret, params, desc) = id.info
   1952                 if ret[0] == "void":
   1953                     continue
   1954                 if ret[0] in funcs:
   1955                     funcs[ret[0]].append(name)
   1956                 else:
   1957                     funcs[ret[0]] = [name]
   1958             except:
   1959                 pass
   1960         typ = list(funcs.keys())
   1961         typ.sort()
   1962         for type in typ:
   1963             if type == '' or type == 'void' or type == "int" or \
   1964                type == "char *" or type == "const char *" :
   1965                 continue
   1966             output.write("    <type name='%s'>\n" % (type))
   1967             ids = funcs[type]
   1968             ids.sort()
   1969             for id in ids:
   1970                 output.write("      <ref name='%s'/>\n" % (id))
   1971             output.write("    </type>\n")
   1972 
   1973     def serialize_xrefs_alpha(self, output):
   1974         letter = None
   1975         ids = list(self.idx.identifiers.keys())
   1976         ids.sort()
   1977         for id in ids:
   1978             if id[0] != letter:
   1979                 if letter != None:
   1980                     output.write("    </letter>\n")
   1981                 letter = id[0]
   1982                 output.write("    <letter name='%s'>\n" % (letter))
   1983             output.write("      <ref name='%s'/>\n" % (id))
   1984         if letter != None:
   1985             output.write("    </letter>\n")
   1986 
   1987     def serialize_xrefs_references(self, output):
   1988         typ = list(self.idx.identifiers.keys())
   1989         typ.sort()
   1990         for id in typ:
   1991             idf = self.idx.identifiers[id]
   1992             module = idf.header
   1993             output.write("    <reference name='%s' href='%s'/>\n" % (id,
   1994                          'html/' + self.basename + '-' +
   1995                          self.modulename_file(module) + '.html#' +
   1996                          id))
   1997 
   1998     def serialize_xrefs_index(self, output):
   1999         index = self.xref
   2000         typ = list(index.keys())
   2001         typ.sort()
   2002         letter = None
   2003         count = 0
   2004         chunk = 0
   2005         chunks = []
   2006         for id in typ:
   2007             if len(index[id]) > 30:
   2008                 continue
   2009             if id[0] != letter:
   2010                 if letter == None or count > 200:
   2011                     if letter != None:
   2012                         output.write("      </letter>\n")
   2013                         output.write("    </chunk>\n")
   2014                         count = 0
   2015                         chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
   2016                     output.write("    <chunk name='chunk%s'>\n" % (chunk))
   2017                     first_letter = id[0]
   2018                     chunk = chunk + 1
   2019                 elif letter != None:
   2020                     output.write("      </letter>\n")
   2021                 letter = id[0]
   2022                 output.write("      <letter name='%s'>\n" % (letter))
   2023             output.write("        <word name='%s'>\n" % (id))
   2024             tokens = index[id];
   2025             tokens.sort()
   2026             tok = None
   2027             for token in tokens:
   2028                 if tok == token:
   2029                     continue
   2030                 tok = token
   2031                 output.write("          <ref name='%s'/>\n" % (token))
   2032                 count = count + 1
   2033             output.write("        </word>\n")
   2034         if letter != None:
   2035             output.write("      </letter>\n")
   2036             output.write("    </chunk>\n")
   2037             if count != 0:
   2038                 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
   2039             output.write("    <chunks>\n")
   2040             for ch in chunks:
   2041                 output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
   2042                              ch[0], ch[1], ch[2]))
   2043             output.write("    </chunks>\n")
   2044 
   2045     def serialize_xrefs(self, output):
   2046         output.write("  <references>\n")
   2047         self.serialize_xrefs_references(output)
   2048         output.write("  </references>\n")
   2049         output.write("  <alpha>\n")
   2050         self.serialize_xrefs_alpha(output)
   2051         output.write("  </alpha>\n")
   2052         output.write("  <constructors>\n")
   2053         self.serialize_xrefs_constructors(output)
   2054         output.write("  </constructors>\n")
   2055         output.write("  <functions>\n")
   2056         self.serialize_xrefs_functions(output)
   2057         output.write("  </functions>\n")
   2058         output.write("  <files>\n")
   2059         self.serialize_xrefs_files(output)
   2060         output.write("  </files>\n")
   2061         output.write("  <index>\n")
   2062         self.serialize_xrefs_index(output)
   2063         output.write("  </index>\n")
   2064 
   2065     def serialize(self):
   2066         filename = "%s-api.xml" % self.name
   2067         print("Saving XML description %s" % (filename))
   2068         output = open(filename, "w")
   2069         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
   2070         output.write("<api name='%s'>\n" % self.name)
   2071         output.write("  <files>\n")
   2072         headers = list(self.headers.keys())
   2073         headers.sort()
   2074         for file in headers:
   2075             self.serialize_exports(output, file)
   2076         output.write("  </files>\n")
   2077         output.write("  <symbols>\n")
   2078         macros = list(self.idx.macros.keys())
   2079         macros.sort()
   2080         for macro in macros:
   2081             self.serialize_macro(output, macro)
   2082         enums = list(self.idx.enums.keys())
   2083         enums.sort()
   2084         for enum in enums:
   2085             self.serialize_enum(output, enum)
   2086         typedefs = list(self.idx.typedefs.keys())
   2087         typedefs.sort()
   2088         for typedef in typedefs:
   2089             self.serialize_typedef(output, typedef)
   2090         variables = list(self.idx.variables.keys())
   2091         variables.sort()
   2092         for variable in variables:
   2093             self.serialize_variable(output, variable)
   2094         functions = list(self.idx.functions.keys())
   2095         functions.sort()
   2096         for function in functions:
   2097             self.serialize_function(output, function)
   2098         output.write("  </symbols>\n")
   2099         output.write("</api>\n")
   2100         output.close()
   2101 
   2102         filename = "%s-refs.xml" % self.name
   2103         print("Saving XML Cross References %s" % (filename))
   2104         output = open(filename, "w")
   2105         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
   2106         output.write("<apirefs name='%s'>\n" % self.name)
   2107         self.serialize_xrefs(output)
   2108         output.write("</apirefs>\n")
   2109         output.close()
   2110 
   2111 
   2112 def rebuild():
   2113     builder = None
   2114     if glob.glob("parser.c") != [] :
   2115         print("Rebuilding API description for libxml2")
   2116         builder = docBuilder("libxml2", [".", "."],
   2117                              ["xmlwin32version.h", "tst.c"])
   2118     elif glob.glob("../parser.c") != [] :
   2119         print("Rebuilding API description for libxml2")
   2120         builder = docBuilder("libxml2", ["..", "../include/libxml"],
   2121                              ["xmlwin32version.h", "tst.c"])
   2122     elif glob.glob("../libxslt/transform.c") != [] :
   2123         print("Rebuilding API description for libxslt")
   2124         builder = docBuilder("libxslt", ["../libxslt"],
   2125                              ["win32config.h", "libxslt.h", "tst.c"])
   2126     else:
   2127         print("rebuild() failed, unable to guess the module")
   2128         return None
   2129     builder.scan()
   2130     builder.analyze()
   2131     builder.serialize()
   2132     if glob.glob("../libexslt/exslt.c") != [] :
   2133         extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
   2134         extra.scan()
   2135         extra.analyze()
   2136         extra.serialize()
   2137     return builder
   2138 
   2139 #
   2140 # for debugging the parser
   2141 #
   2142 def parse(filename):
   2143     parser = CParser(filename)
   2144     idx = parser.parse()
   2145     return idx
   2146 
   2147 if __name__ == "__main__":
   2148     if len(sys.argv) > 1:
   2149         debug = 1
   2150         parse(sys.argv[1])
   2151     else:
   2152         rebuild()
   2153