Home | History | Annotate | Download | only in docmaker
      1 #
      2 #  content.py
      3 #
      4 #    Parse comment blocks to build content blocks (library file).
      5 #
      6 #  Copyright 2002-2018 by
      7 #  David Turner.
      8 #
      9 #  This file is part of the FreeType project, and may only be used,
     10 #  modified, and distributed under the terms of the FreeType project
     11 #  license, LICENSE.TXT.  By continuing to use, modify, or distribute
     12 #  this file you indicate that you have read the license and
     13 #  understand and accept it fully.
     14 
     15 #
     16 # This file contains routines to parse documentation comment blocks,
     17 # building more structured objects out of them.
     18 #
     19 
     20 
     21 from sources import *
     22 from utils   import *
     23 
     24 import string, re
     25 
     26 
     27 #
     28 # Regular expressions to detect code sequences.  `Code sequences' are simply
     29 # code fragments embedded in '{' and '}', as demonstrated in the following
     30 # example.
     31 #
     32 #   {
     33 #     x = y + z;
     34 #     if ( zookoo == 2 )
     35 #     {
     36 #       foobar();
     37 #     }
     38 #   }
     39 #
     40 # Note that the indentation of the first opening brace and the last closing
     41 # brace must be exactly the same.  The code sequence itself should have a
     42 # larger indentation than the surrounding braces.
     43 #
     44 re_code_start = re.compile( r"(\s*){\s*$" )
     45 re_code_end   = re.compile( r"(\s*)}\s*$" )
     46 
     47 
     48 #
     49 # A regular expression to isolate identifiers from other text.  Two syntax
     50 # forms are supported:
     51 #
     52 #   <name>
     53 #   <name>[<id>]
     54 #
     55 # where both `<name>' and `<id>' consist of alphanumeric characters, `_',
     56 # and `-'.  Use `<id>' if there are multiple, valid `<name>' entries; in the
     57 # index, `<id>' will be appended in parentheses.
     58 #
     59 # For example,
     60 #
     61 #   stem_darkening[autofit]
     62 #
     63 # becomes `stem_darkening (autofit)' in the index.
     64 #
     65 re_identifier = re.compile( r"""
     66                               ((?:\w|-)+
     67                                (?:\[(?:\w|-)+\])?)
     68                             """, re.VERBOSE )
     69 
     70 
     71 #
     72 # We collect macro names ending in `_H' (group 1), as defined in
     73 # `freetype/config/ftheader.h'.  While outputting the object data, we use
     74 # this info together with the object's file location (group 2) to emit the
     75 # appropriate header file macro and its associated file name before the
     76 # object itself.
     77 #
     78 # Example:
     79 #
     80 #   #define FT_FREETYPE_H <freetype.h>
     81 #
     82 re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
     83 
     84 
     85 ################################################################
     86 ##
     87 ##  DOC CODE CLASS
     88 ##
     89 ##  The `DocCode' class is used to store source code lines.
     90 ##
     91 ##  `self.lines' contains a set of source code lines that will be dumped as
     92 ##  HTML in a <PRE> tag.
     93 ##
     94 ##  The object is filled line by line by the parser; it strips the leading
     95 ##  `margin' space from each input line before storing it in `self.lines'.
     96 ##
     97 class  DocCode:
     98 
     99     def  __init__( self, margin, lines ):
    100         self.lines = []
    101         self.words = None
    102 
    103         # remove margin spaces
    104         for l in lines:
    105             if string.strip( l[:margin] ) == "":
    106                 l = l[margin:]
    107             self.lines.append( l )
    108 
    109     def  dump( self, prefix = "", width = 60 ):
    110         lines = self.dump_lines( 0, width )
    111         for l in lines:
    112             print( prefix + l )
    113 
    114     def  dump_lines( self, margin = 0, width = 60 ):
    115         result = []
    116         for l in self.lines:
    117             result.append( " " * margin + l )
    118         return result
    119 
    120 
    121 
    122 ################################################################
    123 ##
    124 ##  DOC PARA CLASS
    125 ##
    126 ##  `Normal' text paragraphs are stored in the `DocPara' class.
    127 ##
    128 ##  `self.words' contains the list of words that make up the paragraph.
    129 ##
    130 class  DocPara:
    131 
    132     def  __init__( self, lines ):
    133         self.lines = None
    134         self.words = []
    135         for l in lines:
    136             l = string.strip( l )
    137             self.words.extend( string.split( l ) )
    138 
    139     def  dump( self, prefix = "", width = 60 ):
    140         lines = self.dump_lines( 0, width )
    141         for l in lines:
    142             print( prefix + l )
    143 
    144     def  dump_lines( self, margin = 0, width = 60 ):
    145         cur    = ""  # current line
    146         col    = 0   # current width
    147         result = []
    148 
    149         for word in self.words:
    150             ln = len( word )
    151             if col > 0:
    152                 ln = ln + 1
    153 
    154             if col + ln > width:
    155                 result.append( " " * margin + cur )
    156                 cur = word
    157                 col = len( word )
    158             else:
    159                 if col > 0:
    160                     cur = cur + " "
    161                 cur = cur + word
    162                 col = col + ln
    163 
    164         if col > 0:
    165             result.append( " " * margin + cur )
    166 
    167         return result
    168 
    169 
    170 ################################################################
    171 ##
    172 ##  DOC FIELD CLASS
    173 ##
    174 ##  The `DocField' class stores a list containing either `DocPara' or
    175 ##  `DocCode' objects.  Each DocField object also has an optional `name'
    176 ##  that is used when the object corresponds to a field or value definition.
    177 ##
    178 class  DocField:
    179 
    180     def  __init__( self, name, lines ):
    181         self.name  = name  # can be `None' for normal paragraphs/sources
    182         self.items = []    # list of items
    183 
    184         mode_none  = 0     # start parsing mode
    185         mode_code  = 1     # parsing code sequences
    186         mode_para  = 3     # parsing normal paragraph
    187 
    188         margin     = -1    # current code sequence indentation
    189         cur_lines  = []
    190 
    191         # analyze the markup lines to check whether they contain paragraphs,
    192         # code sequences, or fields definitions
    193         #
    194         start = 0
    195         mode  = mode_none
    196 
    197         for l in lines:
    198             # are we parsing a code sequence?
    199             if mode == mode_code:
    200                 m = re_code_end.match( l )
    201                 if m and len( m.group( 1 ) ) <= margin:
    202                     # that's it, we finished the code sequence
    203                     code = DocCode( 0, cur_lines )
    204                     self.items.append( code )
    205                     margin    = -1
    206                     cur_lines = []
    207                     mode      = mode_none
    208                 else:
    209                     # otherwise continue the code sequence
    210                     cur_lines.append( l[margin:] )
    211             else:
    212                 # start of code sequence?
    213                 m = re_code_start.match( l )
    214                 if m:
    215                     # save current lines
    216                     if cur_lines:
    217                         para = DocPara( cur_lines )
    218                         self.items.append( para )
    219                         cur_lines = []
    220 
    221                     # switch to code extraction mode
    222                     margin = len( m.group( 1 ) )
    223                     mode   = mode_code
    224                 else:
    225                     if not string.split( l ) and cur_lines:
    226                         # if the line is empty, we end the current paragraph,
    227                         # if any
    228                         para = DocPara( cur_lines )
    229                         self.items.append( para )
    230                         cur_lines = []
    231                     else:
    232                         # otherwise, simply add the line to the current
    233                         # paragraph
    234                         cur_lines.append( l )
    235 
    236         if mode == mode_code:
    237             # unexpected end of code sequence
    238             code = DocCode( margin, cur_lines )
    239             self.items.append( code )
    240         elif cur_lines:
    241             para = DocPara( cur_lines )
    242             self.items.append( para )
    243 
    244     def  dump( self, prefix = "" ):
    245         if self.field:
    246             print( prefix + self.field + " ::" )
    247             prefix = prefix + "----"
    248 
    249         first = 1
    250         for p in self.items:
    251             if not first:
    252                 print( "" )
    253             p.dump( prefix )
    254             first = 0
    255 
    256     def  dump_lines( self, margin = 0, width = 60 ):
    257         result = []
    258         nl     = None
    259 
    260         for p in self.items:
    261             if nl:
    262                 result.append( "" )
    263 
    264             result.extend( p.dump_lines( margin, width ) )
    265             nl = 1
    266 
    267         return result
    268 
    269 
    270 #
    271 # A regular expression to detect field definitions.
    272 #
    273 # Examples:
    274 #
    275 #   foo     ::
    276 #   foo.bar ::
    277 #
    278 re_field = re.compile( r"""
    279                          \s*
    280                            (
    281                              \w*
    282                            |
    283                              \w (\w | \.)* \w
    284                            )
    285                          \s* ::
    286                        """, re.VERBOSE )
    287 
    288 
    289 ################################################################
    290 ##
    291 ##  DOC MARKUP CLASS
    292 ##
    293 class  DocMarkup:
    294 
    295     def  __init__( self, tag, lines ):
    296         self.tag    = string.lower( tag )
    297         self.fields = []
    298 
    299         cur_lines = []
    300         field     = None
    301         mode      = 0
    302 
    303         for l in lines:
    304             m = re_field.match( l )
    305             if m:
    306                 # We detected the start of a new field definition.
    307 
    308                 # first, save the current one
    309                 if cur_lines:
    310                     f = DocField( field, cur_lines )
    311                     self.fields.append( f )
    312                     cur_lines = []
    313                     field     = None
    314 
    315                 field     = m.group( 1 )   # record field name
    316                 ln        = len( m.group( 0 ) )
    317                 l         = " " * ln + l[ln:]
    318                 cur_lines = [l]
    319             else:
    320                 cur_lines.append( l )
    321 
    322         if field or cur_lines:
    323             f = DocField( field, cur_lines )
    324             self.fields.append( f )
    325 
    326     def  get_name( self ):
    327         try:
    328             return self.fields[0].items[0].words[0]
    329         except:
    330             return None
    331 
    332     def  dump( self, margin ):
    333         print( " " * margin + "<" + self.tag + ">" )
    334         for f in self.fields:
    335             f.dump( "  " )
    336         print( " " * margin + "</" + self.tag + ">" )
    337 
    338 
    339 ################################################################
    340 ##
    341 ##  DOC CHAPTER CLASS
    342 ##
    343 class  DocChapter:
    344 
    345     def  __init__( self, block ):
    346         self.block    = block
    347         self.sections = []
    348         if block:
    349             self.name  = block.name
    350             self.title = block.get_markup_words( "title" )
    351             self.order = block.get_markup_words( "sections" )
    352         else:
    353             self.name  = "Other"
    354             self.title = string.split( "Miscellaneous" )
    355             self.order = []
    356 
    357 
    358 ################################################################
    359 ##
    360 ##  DOC SECTION CLASS
    361 ##
    362 class  DocSection:
    363 
    364     def  __init__( self, name = "Other" ):
    365         self.name        = name
    366         self.blocks      = {}
    367         self.block_names = []  # ordered block names in section
    368         self.defs        = []
    369         self.abstract    = ""
    370         self.description = ""
    371         self.order       = []
    372         self.title       = "ERROR"
    373         self.chapter     = None
    374 
    375     def  add_def( self, block ):
    376         self.defs.append( block )
    377 
    378     def  add_block( self, block ):
    379         self.block_names.append( block.name )
    380         self.blocks[block.name] = block
    381 
    382     def  process( self ):
    383         # look up one block that contains a valid section description
    384         for block in self.defs:
    385             title = block.get_markup_text( "title" )
    386             if title:
    387                 self.title       = title
    388                 self.abstract    = block.get_markup_words( "abstract" )
    389                 self.description = block.get_markup_items( "description" )
    390                 self.order       = block.get_markup_words_all( "order" )
    391                 return
    392 
    393     def  reorder( self ):
    394         self.block_names = sort_order_list( self.block_names, self.order )
    395 
    396 
    397 ################################################################
    398 ##
    399 ##  CONTENT PROCESSOR CLASS
    400 ##
    401 class  ContentProcessor:
    402 
    403     def  __init__( self ):
    404         """Initialize a block content processor."""
    405         self.reset()
    406 
    407         self.sections = {}    # dictionary of documentation sections
    408         self.section  = None  # current documentation section
    409 
    410         self.chapters = []    # list of chapters
    411 
    412         self.headers  = {}    # dictionary of header macros
    413 
    414     def  set_section( self, section_name ):
    415         """Set current section during parsing."""
    416         if not section_name in self.sections:
    417             section = DocSection( section_name )
    418             self.sections[section_name] = section
    419             self.section                = section
    420         else:
    421             self.section = self.sections[section_name]
    422 
    423     def  add_chapter( self, block ):
    424         chapter = DocChapter( block )
    425         self.chapters.append( chapter )
    426 
    427     def  reset( self ):
    428         """Reset the content processor for a new block."""
    429         self.markups      = []
    430         self.markup       = None
    431         self.markup_lines = []
    432 
    433     def  add_markup( self ):
    434         """Add a new markup section."""
    435         if self.markup and self.markup_lines:
    436 
    437             # get rid of last line of markup if it's empty
    438             marks = self.markup_lines
    439             if len( marks ) > 0 and not string.strip( marks[-1] ):
    440                 self.markup_lines = marks[:-1]
    441 
    442             m = DocMarkup( self.markup, self.markup_lines )
    443 
    444             self.markups.append( m )
    445 
    446             self.markup       = None
    447             self.markup_lines = []
    448 
    449     def  process_content( self, content ):
    450         """Process a block content and return a list of DocMarkup objects
    451            corresponding to it."""
    452         markup       = None
    453         markup_lines = []
    454         first        = 1
    455 
    456         margin  = -1
    457         in_code = 0
    458 
    459         for line in content:
    460             if in_code:
    461                 m = re_code_end.match( line )
    462                 if m and len( m.group( 1 ) ) <= margin:
    463                     in_code = 0
    464                     margin  = -1
    465             else:
    466                 m = re_code_start.match( line )
    467                 if m:
    468                     in_code = 1
    469                     margin  = len( m.group( 1 ) )
    470 
    471             found = None
    472 
    473             if not in_code:
    474                 for t in re_markup_tags:
    475                     m = t.match( line )
    476                     if m:
    477                         found  = string.lower( m.group( 1 ) )
    478                         prefix = len( m.group( 0 ) )
    479                         # remove markup from line
    480                         line   = " " * prefix + line[prefix:]
    481                         break
    482 
    483             # is it the start of a new markup section ?
    484             if found:
    485                 first = 0
    486                 self.add_markup()  # add current markup content
    487                 self.markup = found
    488                 if len( string.strip( line ) ) > 0:
    489                     self.markup_lines.append( line )
    490             elif first == 0:
    491                 self.markup_lines.append( line )
    492 
    493         self.add_markup()
    494 
    495         return self.markups
    496 
    497     def  parse_sources( self, source_processor ):
    498         blocks = source_processor.blocks
    499         count  = len( blocks )
    500 
    501         for n in range( count ):
    502             source = blocks[n]
    503             if source.content:
    504                 # this is a documentation comment, we need to catch
    505                 # all following normal blocks in the "follow" list
    506                 #
    507                 follow = []
    508                 m = n + 1
    509                 while m < count and not blocks[m].content:
    510                     follow.append( blocks[m] )
    511                     m = m + 1
    512 
    513                 doc_block = DocBlock( source, follow, self )
    514 
    515     def  finish( self ):
    516         # process all sections to extract their abstract, description
    517         # and ordered list of items
    518         #
    519         for sec in self.sections.values():
    520             sec.process()
    521 
    522         # process chapters to check that all sections are correctly
    523         # listed there
    524         for chap in self.chapters:
    525             for sec in chap.order:
    526                 if sec in self.sections:
    527                     section = self.sections[sec]
    528                     section.chapter = chap
    529                     section.reorder()
    530                     chap.sections.append( section )
    531                 else:
    532                     sys.stderr.write( "WARNING: chapter '" +          \
    533                         chap.name + "' in " + chap.block.location() + \
    534                         " lists unknown section '" + sec + "'\n" )
    535 
    536         # check that all sections are in a chapter
    537         #
    538         others = []
    539         for sec in self.sections.values():
    540             if not sec.chapter:
    541                 sec.reorder()
    542                 others.append( sec )
    543 
    544         # create a new special chapter for all remaining sections
    545         # when necessary
    546         #
    547         if others:
    548             chap = DocChapter( None )
    549             chap.sections = others
    550             self.chapters.append( chap )
    551 
    552 
    553 ################################################################
    554 ##
    555 ##  DOC BLOCK CLASS
    556 ##
    557 class  DocBlock:
    558 
    559     def  __init__( self, source, follow, processor ):
    560         processor.reset()
    561 
    562         self.source  = source
    563         self.code    = []
    564         self.type    = "ERRTYPE"
    565         self.name    = "ERRNAME"
    566         self.section = processor.section
    567         self.markups = processor.process_content( source.content )
    568 
    569         # compute block type from first markup tag
    570         try:
    571             self.type = self.markups[0].tag
    572         except:
    573             pass
    574 
    575         # compute block name from first markup paragraph
    576         try:
    577             markup = self.markups[0]
    578             para   = markup.fields[0].items[0]
    579             name   = para.words[0]
    580             m = re_identifier.match( name )
    581             if m:
    582                 name = m.group( 1 )
    583             self.name = name
    584         except:
    585             pass
    586 
    587         if self.type == "section":
    588             # detect new section starts
    589             processor.set_section( self.name )
    590             processor.section.add_def( self )
    591         elif self.type == "chapter":
    592             # detect new chapter
    593             processor.add_chapter( self )
    594         else:
    595             processor.section.add_block( self )
    596 
    597         # now, compute the source lines relevant to this documentation
    598         # block. We keep normal comments in for obvious reasons (??)
    599         source = []
    600         for b in follow:
    601             if b.format:
    602                 break
    603             for l in b.lines:
    604                 # collect header macro definitions
    605                 m = re_header_macro.match( l )
    606                 if m:
    607                     processor.headers[m.group( 2 )] = m.group( 1 );
    608 
    609                 # we use "/* */" as a separator
    610                 if re_source_sep.match( l ):
    611                     break
    612                 source.append( l )
    613 
    614         # now strip the leading and trailing empty lines from the sources
    615         start = 0
    616         end   = len( source ) - 1
    617 
    618         while start < end and not string.strip( source[start] ):
    619             start = start + 1
    620 
    621         while start < end and not string.strip( source[end] ):
    622             end = end - 1
    623 
    624         if start == end and not string.strip( source[start] ):
    625             self.code = []
    626         else:
    627             self.code = source[start:end + 1]
    628 
    629     def  location( self ):
    630         return self.source.location()
    631 
    632     def  get_markup( self, tag_name ):
    633         """Return the DocMarkup corresponding to a given tag in a block."""
    634         for m in self.markups:
    635             if m.tag == string.lower( tag_name ):
    636                 return m
    637         return None
    638 
    639     def  get_markup_words( self, tag_name ):
    640         try:
    641             m = self.get_markup( tag_name )
    642             return m.fields[0].items[0].words
    643         except:
    644             return []
    645 
    646     def  get_markup_words_all( self, tag_name ):
    647         try:
    648             m = self.get_markup( tag_name )
    649             words = []
    650             for item in m.fields[0].items:
    651                 # We honour empty lines in an `<Order>' section element by
    652                 # adding the sentinel `/empty/'.  The formatter should then
    653                 # convert it to an appropriate representation in the
    654                 # `section_enter' function.
    655                 words += item.words
    656                 words.append( "/empty/" )
    657             return words
    658         except:
    659             return []
    660 
    661     def  get_markup_text( self, tag_name ):
    662         result = self.get_markup_words( tag_name )
    663         return string.join( result )
    664 
    665     def  get_markup_items( self, tag_name ):
    666         try:
    667             m = self.get_markup( tag_name )
    668             return m.fields[0].items
    669         except:
    670             return None
    671 
    672 # eof
    673