Home | History | Annotate | Download | only in src
      1 #!/usr/bin/env python2
      2 """Doxygen XML to SWIG docstring converter.
      3 
      4 Usage:
      5 
      6   doxy2swig.py [options] input.xml output.i
      7 
      8 Converts Doxygen generated XML files into a file containing docstrings
      9 that can be used by SWIG-1.3.x.  Note that you need to get SWIG
     10 version > 1.3.23 or use Robin Dunn's docstring patch to be able to use
     11 the resulting output.
     12 
     13 input.xml is your doxygen generated XML file and output.i is where the
     14 output will be written (the file will be clobbered).
     15 
     16 """
     17 #
     18 #
     19 # This code is implemented using Mark Pilgrim's code as a guideline:
     20 #   http://www.faqs.org/docs/diveintopython/kgp_divein.html
     21 #
     22 # Author: Prabhu Ramachandran
     23 # License: BSD style
     24 #
     25 # Thanks:
     26 #   Johan Hake:  the include_function_definition feature
     27 #   Bill Spotz:  bug reports and testing.
     28 #   Sebastian Henschel:   Misc. enhancements.
     29 #
     30 #
     31 
     32 from xml.dom import minidom
     33 import re
     34 import textwrap
     35 import sys
     36 import os.path
     37 import optparse
     38 
     39 
     40 def my_open_read(source):
     41     if hasattr(source, "read"):
     42         return source
     43     else:
     44         return open(source)
     45 
     46 
     47 def my_open_write(dest):
     48     if hasattr(dest, "write"):
     49         return dest
     50     else:
     51         return open(dest, 'w')
     52 
     53 
     54 class Doxy2SWIG:
     55 
     56     """Converts Doxygen generated XML files into a file containing
     57     docstrings that can be used by SWIG-1.3.x that have support for
     58     feature("docstring").  Once the data is parsed it is stored in
     59     self.pieces.
     60 
     61     """
     62 
     63     def __init__(self, src, include_function_definition=True, quiet=False):
     64         """Initialize the instance given a source object.  `src` can
     65         be a file or filename.  If you do not want to include function
     66         definitions from doxygen then set
     67         `include_function_definition` to `False`.  This is handy since
     68         this allows you to use the swig generated function definition
     69         using %feature("autodoc", [0,1]).
     70 
     71         """
     72         f = my_open_read(src)
     73         self.my_dir = os.path.dirname(f.name)
     74         self.xmldoc = minidom.parse(f).documentElement
     75         f.close()
     76 
     77         self.pieces = []
     78         self.pieces.append('\n// File: %s\n' %
     79                            os.path.basename(f.name))
     80 
     81         self.space_re = re.compile(r'\s+')
     82         self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
     83         self.multi = 0
     84         self.ignores = ['inheritancegraph', 'param', 'listofallmembers',
     85                         'innerclass', 'name', 'declname', 'incdepgraph',
     86                         'invincdepgraph', 'programlisting', 'type',
     87                         'references', 'referencedby', 'location',
     88                         'collaborationgraph', 'reimplements',
     89                         'reimplementedby', 'derivedcompoundref',
     90                         'basecompoundref']
     91         #self.generics = []
     92         self.include_function_definition = include_function_definition
     93         if not include_function_definition:
     94             self.ignores.append('argsstring')
     95 
     96         self.quiet = quiet
     97 
     98     def generate(self):
     99         """Parses the file set in the initialization.  The resulting
    100         data is stored in `self.pieces`.
    101 
    102         """
    103         self.parse(self.xmldoc)
    104 
    105     def parse(self, node):
    106         """Parse a given node.  This function in turn calls the
    107         `parse_<nodeType>` functions which handle the respective
    108         nodes.
    109 
    110         """
    111         pm = getattr(self, "parse_%s" % node.__class__.__name__)
    112         pm(node)
    113 
    114     def parse_Document(self, node):
    115         self.parse(node.documentElement)
    116 
    117     def parse_Text(self, node):
    118         txt = node.data
    119         txt = txt.replace('\\', r'\\\\')
    120         txt = txt.replace('"', r'\"')
    121         # ignore pure whitespace
    122         m = self.space_re.match(txt)
    123         if m and len(m.group()) == len(txt):
    124             pass
    125         else:
    126             self.add_text(textwrap.fill(txt, break_long_words=False))
    127 
    128     def parse_Element(self, node):
    129         """Parse an `ELEMENT_NODE`.  This calls specific
    130         `do_<tagName>` handers for different elements.  If no handler
    131         is available the `generic_parse` method is called.  All
    132         tagNames specified in `self.ignores` are simply ignored.
    133 
    134         """
    135         name = node.tagName
    136         ignores = self.ignores
    137         if name in ignores:
    138             return
    139         attr = "do_%s" % name
    140         if hasattr(self, attr):
    141             handlerMethod = getattr(self, attr)
    142             handlerMethod(node)
    143         else:
    144             self.generic_parse(node)
    145             #if name not in self.generics: self.generics.append(name)
    146 
    147     def parse_Comment(self, node):
    148         """Parse a `COMMENT_NODE`.  This does nothing for now."""
    149         return
    150 
    151     def add_text(self, value):
    152         """Adds text corresponding to `value` into `self.pieces`."""
    153         if isinstance(value, (list, tuple)):
    154             self.pieces.extend(value)
    155         else:
    156             self.pieces.append(value)
    157 
    158     def get_specific_nodes(self, node, names):
    159         """Given a node and a sequence of strings in `names`, return a
    160         dictionary containing the names as keys and child
    161         `ELEMENT_NODEs`, that have a `tagName` equal to the name.
    162 
    163         """
    164         nodes = [(x.tagName, x) for x in node.childNodes
    165                  if x.nodeType == x.ELEMENT_NODE and
    166                  x.tagName in names]
    167         return dict(nodes)
    168 
    169     def generic_parse(self, node, pad=0):
    170         """A Generic parser for arbitrary tags in a node.
    171 
    172         Parameters:
    173 
    174          - node:  A node in the DOM.
    175          - pad: `int` (default: 0)
    176 
    177            If 0 the node data is not padded with newlines.  If 1 it
    178            appends a newline after parsing the childNodes.  If 2 it
    179            pads before and after the nodes are processed.  Defaults to
    180            0.
    181 
    182         """
    183         npiece = 0
    184         if pad:
    185             npiece = len(self.pieces)
    186             if pad == 2:
    187                 self.add_text('\n')
    188         for n in node.childNodes:
    189             self.parse(n)
    190         if pad:
    191             if len(self.pieces) > npiece:
    192                 self.add_text('\n')
    193 
    194     def space_parse(self, node):
    195         self.add_text(' ')
    196         self.generic_parse(node)
    197 
    198     do_ref = space_parse
    199     do_emphasis = space_parse
    200     do_bold = space_parse
    201     do_computeroutput = space_parse
    202     do_formula = space_parse
    203 
    204     def do_compoundname(self, node):
    205         self.add_text('\n\n')
    206         data = node.firstChild.data
    207         self.add_text('%%feature("docstring") %s "\n' % data)
    208 
    209     def do_compounddef(self, node):
    210         kind = node.attributes['kind'].value
    211         if kind in ('class', 'struct'):
    212             prot = node.attributes['prot'].value
    213             if prot != 'public':
    214                 return
    215             names = ('compoundname', 'briefdescription',
    216                      'detaileddescription', 'includes')
    217             first = self.get_specific_nodes(node, names)
    218             for n in names:
    219                 if first.has_key(n):
    220                     self.parse(first[n])
    221             self.add_text(['";', '\n'])
    222             for n in node.childNodes:
    223                 if n not in first.values():
    224                     self.parse(n)
    225         elif kind in ('file', 'namespace'):
    226             nodes = node.getElementsByTagName('sectiondef')
    227             for n in nodes:
    228                 self.parse(n)
    229 
    230     def do_includes(self, node):
    231         self.add_text('C++ includes: ')
    232         self.generic_parse(node, pad=1)
    233 
    234     def do_parameterlist(self, node):
    235         text = 'unknown'
    236         for key, val in node.attributes.items():
    237             if key == 'kind':
    238                 if val == 'param':
    239                     text = 'Parameters'
    240                 elif val == 'exception':
    241                     text = 'Exceptions'
    242                 elif val == 'retval':
    243                     text = 'Returns'
    244                 else:
    245                     text = val
    246                 break
    247         self.add_text(['\n', '\n', text, ':', '\n'])
    248         self.generic_parse(node, pad=1)
    249 
    250     def do_para(self, node):
    251         self.add_text('\n')
    252         self.generic_parse(node, pad=1)
    253 
    254     def do_parametername(self, node):
    255         self.add_text('\n')
    256         try:
    257             data = node.firstChild.data
    258         except AttributeError:  # perhaps a <ref> tag in it
    259             data = node.firstChild.firstChild.data
    260         if data.find('Exception') != -1:
    261             self.add_text(data)
    262         else:
    263             self.add_text("%s: " % data)
    264 
    265     def do_parameterdefinition(self, node):
    266         self.generic_parse(node, pad=1)
    267 
    268     def do_detaileddescription(self, node):
    269         self.generic_parse(node, pad=1)
    270 
    271     def do_briefdescription(self, node):
    272         self.generic_parse(node, pad=1)
    273 
    274     def do_memberdef(self, node):
    275         prot = node.attributes['prot'].value
    276         id = node.attributes['id'].value
    277         kind = node.attributes['kind'].value
    278         tmp = node.parentNode.parentNode.parentNode
    279         compdef = tmp.getElementsByTagName('compounddef')[0]
    280         cdef_kind = compdef.attributes['kind'].value
    281 
    282         if prot == 'public':
    283             first = self.get_specific_nodes(node, ('definition', 'name'))
    284             name = first['name'].firstChild.data
    285             if name[:8] == 'operator':  # Don't handle operators yet.
    286                 return
    287 
    288             if not 'definition' in first or \
    289                    kind in ['variable', 'typedef']:
    290                 return
    291 
    292             if self.include_function_definition:
    293                 defn = first['definition'].firstChild.data
    294             else:
    295                 defn = ""
    296             self.add_text('\n')
    297             self.add_text('%feature("docstring") ')
    298 
    299             anc = node.parentNode.parentNode
    300             if cdef_kind in ('file', 'namespace'):
    301                 ns_node = anc.getElementsByTagName('innernamespace')
    302                 if not ns_node and cdef_kind == 'namespace':
    303                     ns_node = anc.getElementsByTagName('compoundname')
    304                 if ns_node:
    305                     ns = ns_node[0].firstChild.data
    306                     self.add_text(' %s::%s "\n%s' % (ns, name, defn))
    307                 else:
    308                     self.add_text(' %s "\n%s' % (name, defn))
    309             elif cdef_kind in ('class', 'struct'):
    310                 # Get the full function name.
    311                 anc_node = anc.getElementsByTagName('compoundname')
    312                 cname = anc_node[0].firstChild.data
    313                 self.add_text(' %s::%s "\n%s' % (cname, name, defn))
    314 
    315             for n in node.childNodes:
    316                 if n not in first.values():
    317                     self.parse(n)
    318             self.add_text(['";', '\n'])
    319 
    320     def do_definition(self, node):
    321         data = node.firstChild.data
    322         self.add_text('%s "\n%s' % (data, data))
    323 
    324     def do_sectiondef(self, node):
    325         kind = node.attributes['kind'].value
    326         if kind in ('public-func', 'func', 'user-defined', ''):
    327             self.generic_parse(node)
    328 
    329     def do_header(self, node):
    330         """For a user defined section def a header field is present
    331         which should not be printed as such, so we comment it in the
    332         output."""
    333         data = node.firstChild.data
    334         self.add_text('\n/*\n %s \n*/\n' % data)
    335         # If our immediate sibling is a 'description' node then we
    336         # should comment that out also and remove it from the parent
    337         # node's children.
    338         parent = node.parentNode
    339         idx = parent.childNodes.index(node)
    340         if len(parent.childNodes) >= idx + 2:
    341             nd = parent.childNodes[idx + 2]
    342             if nd.nodeName == 'description':
    343                 nd = parent.removeChild(nd)
    344                 self.add_text('\n/*')
    345                 self.generic_parse(nd)
    346                 self.add_text('\n*/\n')
    347 
    348     def do_simplesect(self, node):
    349         kind = node.attributes['kind'].value
    350         if kind in ('date', 'rcs', 'version'):
    351             pass
    352         elif kind == 'warning':
    353             self.add_text(['\n', 'WARNING: '])
    354             self.generic_parse(node)
    355         elif kind == 'see':
    356             self.add_text('\n')
    357             self.add_text('See: ')
    358             self.generic_parse(node)
    359         else:
    360             self.generic_parse(node)
    361 
    362     def do_argsstring(self, node):
    363         self.generic_parse(node, pad=1)
    364 
    365     def do_member(self, node):
    366         kind = node.attributes['kind'].value
    367         refid = node.attributes['refid'].value
    368         if kind == 'function' and refid[:9] == 'namespace':
    369             self.generic_parse(node)
    370 
    371     def do_doxygenindex(self, node):
    372         self.multi = 1
    373         comps = node.getElementsByTagName('compound')
    374         for c in comps:
    375             refid = c.attributes['refid'].value
    376             fname = refid + '.xml'
    377             if not os.path.exists(fname):
    378                 fname = os.path.join(self.my_dir,  fname)
    379             if not self.quiet:
    380                 print("parsing file: %s" % fname)
    381             p = Doxy2SWIG(fname, self.include_function_definition, self.quiet)
    382             p.generate()
    383             self.pieces.extend(self.clean_pieces(p.pieces))
    384 
    385     def write(self, fname):
    386         o = my_open_write(fname)
    387         if self.multi:
    388             o.write("".join(x.encode('utf-8') for x in self.pieces))
    389         else:
    390             o.write("".join(self.clean_pieces(self.pieces)))
    391         o.close()
    392 
    393     def clean_pieces(self, pieces):
    394         """Cleans the list of strings given as `pieces`.  It replaces
    395         multiple newlines by a maximum of 2 and returns a new list.
    396         It also wraps the paragraphs nicely.
    397 
    398         """
    399         ret = []
    400         count = 0
    401         for i in pieces:
    402             if i == '\n':
    403                 count = count + 1
    404             else:
    405                 if i == '";':
    406                     if count:
    407                         ret.append('\n')
    408                 elif count > 2:
    409                     ret.append('\n\n')
    410                 elif count:
    411                     ret.append('\n' * count)
    412                 count = 0
    413                 ret.append(i)
    414 
    415         _data = "".join(ret)
    416         ret = []
    417         for i in _data.split('\n\n'):
    418             if i == 'Parameters:' or i == 'Exceptions:' or i == 'Returns:':
    419                 ret.extend([i, '\n' + '-' * len(i), '\n\n'])
    420             elif i.find('// File:') > -1:  # leave comments alone.
    421                 ret.extend([i, '\n'])
    422             else:
    423                 _tmp = textwrap.fill(i.strip(), break_long_words=False)
    424                 _tmp = self.lead_spc.sub(r'\1"\2', _tmp)
    425                 ret.extend([_tmp, '\n\n'])
    426         return ret
    427 
    428 
    429 def convert(input, output, include_function_definition=True, quiet=False):
    430     p = Doxy2SWIG(input, include_function_definition, quiet)
    431     p.generate()
    432     p.write(output)
    433 
    434 
    435 def main():
    436     usage = __doc__
    437     parser = optparse.OptionParser(usage)
    438     parser.add_option("-n", '--no-function-definition',
    439                       action='store_true',
    440                       default=False,
    441                       dest='func_def',
    442                       help='do not include doxygen function definitions')
    443     parser.add_option("-q", '--quiet',
    444                       action='store_true',
    445                       default=False,
    446                       dest='quiet',
    447                       help='be quiet and minimize output')
    448 
    449     options, args = parser.parse_args()
    450     if len(args) != 2:
    451         parser.error("error: no input and output specified")
    452 
    453     convert(args[0], args[1], not options.func_def, options.quiet)
    454 
    455 
    456 if __name__ == '__main__':
    457     main()
    458