Home | History | Annotate | Download | only in docmaker
      1 #
      2 #  sources.py
      3 #
      4 #    Convert source code comments to multi-line blocks (library file).
      5 #
      6 #  Copyright 2002-2018 by
      7 #  David Turner.
      8 #
      9 #  This file is part of the FreeType project, and may only be used,
     10 #  modified, and distributed under the terms of the FreeType project
     11 #  license, LICENSE.TXT.  By continuing to use, modify, or distribute
     12 #  this file you indicate that you have read the license and
     13 #  understand and accept it fully.
     14 
     15 #
     16 # This library file contains definitions of classes needed to decompose C
     17 # source code files into a series of multi-line `blocks'.  There are two
     18 # kinds of blocks.
     19 #
     20 #   - Normal blocks, which contain source code or ordinary comments.
     21 #
     22 #   - Documentation blocks, which have restricted formatting, and whose text
     23 #     always start with a documentation markup tag like `<Function>',
     24 #     `<Type>', etc.
     25 #
     26 # The routines to process the content of documentation blocks are contained
     27 # in file `content.py'; the classes and methods found here only deal with
     28 # text parsing and basic documentation block extraction.
     29 #
     30 
     31 
     32 import fileinput, re, string
     33 
     34 
     35 ################################################################
     36 ##
     37 ##  SOURCE BLOCK FORMAT CLASS
     38 ##
     39 ##  A simple class containing compiled regular expressions to detect
     40 ##  potential documentation format block comments within C source code.
     41 ##
     42 ##  The `column' pattern must contain a group to `unbox' the content of
     43 ##  documentation comment blocks.
     44 ##
     45 ##  Later on, paragraphs are converted to long lines, which simplifies the
     46 ##  regular expressions that act upon the text.
     47 ##
     48 class  SourceBlockFormat:
     49 
     50     def  __init__( self, id, start, column, end ):
     51         """Create a block pattern, used to recognize special documentation
     52            blocks."""
     53         self.id     = id
     54         self.start  = re.compile( start, re.VERBOSE )
     55         self.column = re.compile( column, re.VERBOSE )
     56         self.end    = re.compile( end, re.VERBOSE )
     57 
     58 
     59 #
     60 # Format 1 documentation comment blocks.
     61 #
     62 #    /************************************/ (at least 2 asterisks)
     63 #    /*                                  */
     64 #    /*                                  */
     65 #    /*                                  */
     66 #    /************************************/ (at least 2 asterisks)
     67 #
     68 start = r'''
     69   \s*      # any number of whitespace
     70   /\*{2,}/ # followed by '/' and at least two asterisks then '/'
     71   \s*$     # probably followed by whitespace
     72 '''
     73 
     74 column = r'''
     75   \s*      # any number of whitespace
     76   /\*{1}   # followed by '/' and precisely one asterisk
     77   ([^*].*) # followed by anything (group 1)
     78   \*{1}/   # followed by one asterisk and a '/'
     79   \s*$     # probably followed by whitespace
     80 '''
     81 
     82 re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
     83 
     84 
     85 #
     86 # Format 2 documentation comment blocks.
     87 #
     88 #    /************************************ (at least 2 asterisks)
     89 #     *
     90 #     *                                    (1 asterisk)
     91 #     *
     92 #     */                                   (1 or more asterisks)
     93 #
     94 start = r'''
     95   \s*     # any number of whitespace
     96   /\*{2,} # followed by '/' and at least two asterisks
     97   \s*$    # probably followed by whitespace
     98 '''
     99 
    100 column = r'''
    101   \s*           # any number of whitespace
    102   \*{1}(?![*/]) # followed by precisely one asterisk not followed by `/'
    103   (.*)          # then anything (group1)
    104 '''
    105 
    106 end = r'''
    107   \s*  # any number of whitespace
    108   \*+/ # followed by at least one asterisk, then '/'
    109 '''
    110 
    111 re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
    112 
    113 
    114 #
    115 # The list of supported documentation block formats.  We could add new ones
    116 # quite easily.
    117 #
    118 re_source_block_formats = [re_source_block_format1, re_source_block_format2]
    119 
    120 
    121 #
    122 # The following regular expressions correspond to markup tags within the
    123 # documentation comment blocks.  They are equivalent despite their different
    124 # syntax.
    125 #
    126 # A markup tag consists of letters or character `-', to be found in group 1.
    127 #
    128 # Notice that a markup tag _must_ begin a new paragraph.
    129 #
    130 re_markup_tag1 = re.compile( r'''\s*<((?:\w|-)*)>''' )  # <xxxx> format
    131 re_markup_tag2 = re.compile( r'''\s*@((?:\w|-)*):''' )  # @xxxx: format
    132 
    133 #
    134 # The list of supported markup tags.  We could add new ones quite easily.
    135 #
    136 re_markup_tags = [re_markup_tag1, re_markup_tag2]
    137 
    138 
    139 #
    140 # A regular expression to detect a cross reference, after markup tags have
    141 # been stripped off.
    142 #
    143 # Two syntax forms are supported:
    144 #
    145 #   @<name>
    146 #   @<name>[<id>]
    147 #
    148 # where both `<name>' and `<id>' consist of alphanumeric characters, `_',
    149 # and `-'.  Use `<id>' if there are multiple, valid `<name>' entries.
    150 #
    151 # Example: @foo[bar]
    152 #
    153 re_crossref = re.compile( r"""
    154                             @
    155                             (?P<name>(?:\w|-)+
    156                                      (?:\[(?:\w|-)+\])?)
    157                             (?P<rest>.*)
    158                           """, re.VERBOSE )
    159 
    160 #
    161 # Two regular expressions to detect italic and bold markup, respectively.
    162 # Group 1 is the markup, group 2 the rest of the line.
    163 #
    164 # Note that the markup is limited to words consisting of letters, digits,
    165 # the characters `_' and `-', or an apostrophe (but not as the first
    166 # character).
    167 #
    168 re_italic = re.compile( r"_((?:\w|-)(?:\w|'|-)*)_(.*)" )     #  _italic_
    169 re_bold   = re.compile( r"\*((?:\w|-)(?:\w|'|-)*)\*(.*)" )   #  *bold*
    170 
    171 #
    172 # This regular expression code to identify an URL has been taken from
    173 #
    174 #   https://mail.python.org/pipermail/tutor/2002-September/017228.html
    175 #
    176 # (with slight modifications).
    177 #
    178 urls = r'(?:https?|telnet|gopher|file|wais|ftp)'
    179 ltrs = r'\w'
    180 gunk = r'/#~:.?+=&%@!\-'
    181 punc = r'.:?\-'
    182 any  = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs,
    183                                       'gunk' : gunk,
    184                                       'punc' : punc }
    185 url  = r"""
    186          (
    187            \b                    # start at word boundary
    188            %(urls)s :            # need resource and a colon
    189            [%(any)s] +?          # followed by one or more of any valid
    190                                  # character, but be conservative and
    191                                  # take only what you need to...
    192            (?=                   # [look-ahead non-consumptive assertion]
    193              [%(punc)s]*         # either 0 or more punctuation
    194              (?:                 # [non-grouping parentheses]
    195                [^%(any)s] | $    # followed by a non-url char
    196                                  # or end of the string
    197              )
    198            )
    199          )
    200         """ % {'urls' : urls,
    201                'any'  : any,
    202                'punc' : punc }
    203 
    204 re_url = re.compile( url, re.VERBOSE | re.MULTILINE )
    205 
    206 #
    207 # A regular expression that stops collection of comments for the current
    208 # block.
    209 #
    210 re_source_sep = re.compile( r'\s*/\*\s*\*/' )   #  /* */
    211 
    212 #
    213 # A regular expression to find possible C identifiers while outputting
    214 # source code verbatim, covering things like `*foo' or `(bar'.  Group 1 is
    215 # the prefix, group 2 the identifier -- since we scan lines from left to
    216 # right, sequentially splitting the source code into prefix and identifier
    217 # is fully sufficient for our purposes.
    218 #
    219 re_source_crossref = re.compile( r'(\W*)(\w*)' )
    220 
    221 #
    222 # A regular expression that matches a list of reserved C source keywords.
    223 #
    224 re_source_keywords = re.compile( '''\\b ( typedef   |
    225                                           struct    |
    226                                           enum      |
    227                                           union     |
    228                                           const     |
    229                                           char      |
    230                                           int       |
    231                                           short     |
    232                                           long      |
    233                                           void      |
    234                                           signed    |
    235                                           unsigned  |
    236                                           \#include |
    237                                           \#define  |
    238                                           \#undef   |
    239                                           \#if      |
    240                                           \#ifdef   |
    241                                           \#ifndef  |
    242                                           \#else    |
    243                                           \#endif   ) \\b''', re.VERBOSE )
    244 
    245 
    246 ################################################################
    247 ##
    248 ##  SOURCE BLOCK CLASS
    249 ##
    250 ##  There are two important fields in a `SourceBlock' object.
    251 ##
    252 ##    self.lines
    253 ##      A list of text lines for the corresponding block.
    254 ##
    255 ##    self.content
    256 ##      For documentation comment blocks only, this is the block content
    257 ##      that has been `unboxed' from its decoration.  This is `None' for all
    258 ##      other blocks (i.e., sources or ordinary comments with no starting
    259 ##      markup tag)
    260 ##
    261 class  SourceBlock:
    262 
    263     def  __init__( self, processor, filename, lineno, lines ):
    264         self.processor = processor
    265         self.filename  = filename
    266         self.lineno    = lineno
    267         self.lines     = lines[:]
    268         self.format    = processor.format
    269         self.content   = []
    270 
    271         if self.format == None:
    272             return
    273 
    274         words = []
    275 
    276         # extract comment lines
    277         lines = []
    278 
    279         for line0 in self.lines:
    280             m = self.format.column.match( line0 )
    281             if m:
    282                 lines.append( m.group( 1 ) )
    283 
    284         # now, look for a markup tag
    285         for l in lines:
    286             l = string.strip( l )
    287             if len( l ) > 0:
    288                 for tag in re_markup_tags:
    289                     if tag.match( l ):
    290                         self.content = lines
    291                         return
    292 
    293     def  location( self ):
    294         return "(" + self.filename + ":" + repr( self.lineno ) + ")"
    295 
    296     # debugging only -- not used in normal operations
    297     def  dump( self ):
    298         if self.content:
    299             print( "{{{content start---" )
    300             for l in self.content:
    301                 print( l )
    302             print( "---content end}}}" )
    303             return
    304 
    305         fmt = ""
    306         if self.format:
    307             fmt = repr( self.format.id ) + " "
    308 
    309         for line in self.lines:
    310             print( line )
    311 
    312 
    313 ################################################################
    314 ##
    315 ##  SOURCE PROCESSOR CLASS
    316 ##
    317 ##  The `SourceProcessor' is in charge of reading a C source file and
    318 ##  decomposing it into a series of different `SourceBlock' objects.
    319 ##
    320 ##  A SourceBlock object consists of the following data.
    321 ##
    322 ##    - A documentation comment block using one of the layouts above.  Its
    323 ##      exact format will be discussed later.
    324 ##
    325 ##    - Normal sources lines, including comments.
    326 ##
    327 ##
    328 class  SourceProcessor:
    329 
    330     def  __init__( self ):
    331         """Initialize a source processor."""
    332         self.blocks   = []
    333         self.filename = None
    334         self.format   = None
    335         self.lines    = []
    336 
    337     def  reset( self ):
    338         """Reset a block processor and clean up all its blocks."""
    339         self.blocks = []
    340         self.format = None
    341 
    342     def  parse_file( self, filename ):
    343         """Parse a C source file and add its blocks to the processor's
    344            list."""
    345         self.reset()
    346 
    347         self.filename = filename
    348 
    349         fileinput.close()
    350         self.format = None
    351         self.lineno = 0
    352         self.lines  = []
    353 
    354         for line in fileinput.input( filename ):
    355             # strip trailing newlines, important on Windows machines!
    356             if line[-1] == '\012':
    357                 line = line[0:-1]
    358 
    359             if self.format == None:
    360                 self.process_normal_line( line )
    361             else:
    362                 if self.format.end.match( line ):
    363                     # A normal block end.  Add it to `lines' and create a
    364                     # new block
    365                     self.lines.append( line )
    366                     self.add_block_lines()
    367                 elif self.format.column.match( line ):
    368                     # A normal column line.  Add it to `lines'.
    369                     self.lines.append( line )
    370                 else:
    371                     # An unexpected block end.  Create a new block, but
    372                     # don't process the line.
    373                     self.add_block_lines()
    374 
    375                     # we need to process the line again
    376                     self.process_normal_line( line )
    377 
    378         # record the last lines
    379         self.add_block_lines()
    380 
    381     def  process_normal_line( self, line ):
    382         """Process a normal line and check whether it is the start of a new
    383            block."""
    384         for f in re_source_block_formats:
    385             if f.start.match( line ):
    386                 self.add_block_lines()
    387                 self.format = f
    388                 self.lineno = fileinput.filelineno()
    389 
    390         self.lines.append( line )
    391 
    392     def  add_block_lines( self ):
    393         """Add the current accumulated lines and create a new block."""
    394         if self.lines != []:
    395             block = SourceBlock( self,
    396                                  self.filename,
    397                                  self.lineno,
    398                                  self.lines )
    399 
    400             self.blocks.append( block )
    401             self.format = None
    402             self.lines  = []
    403 
    404     # debugging only, not used in normal operations
    405     def  dump( self ):
    406         """Print all blocks in a processor."""
    407         for b in self.blocks:
    408             b.dump()
    409 
    410 # eof
    411