Home | History | Annotate | Download | only in extensions
      1 #!/usr/bin/python
      2 
      3 """
      4 HeaderID Extension for Python-Markdown
      5 ======================================
      6 
      7 Adds ability to set HTML IDs for headers.
      8 
      9 Basic usage:
     10 
     11     >>> import markdown
     12     >>> text = "# Some Header # {#some_id}"
     13     >>> md = markdown.markdown(text, ['headerid'])
     14     >>> md
     15     u'<h1 id="some_id">Some Header</h1>'
     16 
     17 All header IDs are unique:
     18 
     19     >>> text = '''
     20     ... #Header
     21     ... #Another Header {#header}
     22     ... #Third Header {#header}'''
     23     >>> md = markdown.markdown(text, ['headerid'])
     24     >>> md
     25     u'<h1 id="header">Header</h1>\\n<h1 id="header_1">Another Header</h1>\\n<h1 id="header_2">Third Header</h1>'
     26 
     27 To fit within a html template's hierarchy, set the header base level:
     28 
     29     >>> text = '''
     30     ... #Some Header
     31     ... ## Next Level'''
     32     >>> md = markdown.markdown(text, ['headerid(level=3)'])
     33     >>> md
     34     u'<h3 id="some_header">Some Header</h3>\\n<h4 id="next_level">Next Level</h4>'
     35 
     36 Turn off auto generated IDs:
     37 
     38     >>> text = '''
     39     ... # Some Header
     40     ... # Header with ID # { #foo }'''
     41     >>> md = markdown.markdown(text, ['headerid(forceid=False)'])
     42     >>> md
     43     u'<h1>Some Header</h1>\\n<h1 id="foo">Header with ID</h1>'
     44 
     45 Use with MetaData extension:
     46 
     47     >>> text = '''header_level: 2
     48     ... header_forceid: Off
     49     ...
     50     ... # A Header'''
     51     >>> md = markdown.markdown(text, ['headerid', 'meta'])
     52     >>> md
     53     u'<h2>A Header</h2>'
     54 
     55 Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/).
     56 
     57 Project website: <http://www.freewisdom.org/project/python-markdown/HeaderId>
     58 Contact: markdown@freewisdom.org
     59 
     60 License: BSD (see ../docs/LICENSE for details) 
     61 
     62 Dependencies:
     63 * [Python 2.3+](http://python.org)
     64 * [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)
     65 
     66 """
     67 
     68 import markdown
     69 from markdown import etree
     70 import re
     71 from string import ascii_lowercase, digits, punctuation
     72 
     73 ID_CHARS = ascii_lowercase + digits + '-_'
     74 IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
     75 
     76 
     77 class HeaderIdProcessor(markdown.blockprocessors.BlockProcessor):
     78     """ Replacement BlockProcessor for Header IDs. """
     79 
     80     # Detect a header at start of any line in block
     81     RE = re.compile(r"""(^|\n)
     82                         (?P<level>\#{1,6})  # group('level') = string of hashes
     83                         (?P<header>.*?)     # group('header') = Header text
     84                         \#*                 # optional closing hashes
     85                         (?:[ \t]*\{[ \t]*\#(?P<id>[-_:a-zA-Z0-9]+)[ \t]*\})?
     86                         (\n|$)              #  ^^ group('id') = id attribute
     87                      """,
     88                      re.VERBOSE)
     89 
     90     IDs = []
     91 
     92     def test(self, parent, block):
     93         return bool(self.RE.search(block))
     94 
     95     def run(self, parent, blocks):
     96         block = blocks.pop(0)
     97         m = self.RE.search(block)
     98         if m:
     99             before = block[:m.start()] # All lines before header
    100             after = block[m.end():]    # All lines after header
    101             if before:
    102                 # As the header was not the first line of the block and the
    103                 # lines before the header must be parsed first,
    104                 # recursively parse this lines as a block.
    105                 self.parser.parseBlocks(parent, [before])
    106             # Create header using named groups from RE
    107             start_level, force_id = self._get_meta()
    108             level = len(m.group('level')) + start_level
    109             if level > 6: 
    110                 level = 6
    111             h = markdown.etree.SubElement(parent, 'h%d' % level)
    112             h.text = m.group('header').strip()
    113             if m.group('id'):
    114                 h.set('id', self._unique_id(m.group('id')))
    115             elif force_id:
    116                 h.set('id', self._create_id(m.group('header').strip()))
    117             if after:
    118                 # Insert remaining lines as first block for future parsing.
    119                 blocks.insert(0, after)
    120         else:
    121             # This should never happen, but just in case...
    122             message(CRITICAL, "We've got a problem header!")
    123 
    124     def _get_meta(self):
    125         """ Return meta data suported by this ext as a tuple """
    126         level = int(self.config['level'][0]) - 1
    127         force = self._str2bool(self.config['forceid'][0])
    128         if hasattr(self.md, 'Meta'):
    129             if self.md.Meta.has_key('header_level'):
    130                 level = int(self.md.Meta['header_level'][0]) - 1
    131             if self.md.Meta.has_key('header_forceid'): 
    132                 force = self._str2bool(self.md.Meta['header_forceid'][0])
    133         return level, force
    134 
    135     def _str2bool(self, s, default=False):
    136         """ Convert a string to a booleen value. """
    137         s = str(s)
    138         if s.lower() in ['0', 'f', 'false', 'off', 'no', 'n']:
    139             return False
    140         elif s.lower() in ['1', 't', 'true', 'on', 'yes', 'y']:
    141             return True
    142         return default
    143 
    144     def _unique_id(self, id):
    145         """ Ensure ID is unique. Append '_1', '_2'... if not """
    146         while id in self.IDs:
    147             m = IDCOUNT_RE.match(id)
    148             if m:
    149                 id = '%s_%d'% (m.group(1), int(m.group(2))+1)
    150             else:
    151                 id = '%s_%d'% (id, 1)
    152         self.IDs.append(id)
    153         return id
    154 
    155     def _create_id(self, header):
    156         """ Return ID from Header text. """
    157         h = ''
    158         for c in header.lower().replace(' ', '_'):
    159             if c in ID_CHARS:
    160                 h += c
    161             elif c not in punctuation:
    162                 h += '+'
    163         return self._unique_id(h)
    164 
    165 
    166 class HeaderIdExtension (markdown.Extension):
    167     def __init__(self, configs):
    168         # set defaults
    169         self.config = {
    170                 'level' : ['1', 'Base level for headers.'],
    171                 'forceid' : ['True', 'Force all headers to have an id.']
    172             }
    173 
    174         for key, value in configs:
    175             self.setConfig(key, value)
    176 
    177     def extendMarkdown(self, md, md_globals):
    178         md.registerExtension(self)
    179         self.processor = HeaderIdProcessor(md.parser)
    180         self.processor.md = md
    181         self.processor.config = self.config
    182         # Replace existing hasheader in place.
    183         md.parser.blockprocessors['hashheader'] = self.processor
    184 
    185     def reset(self):
    186         self.processor.IDs = []
    187 
    188 
    189 def makeExtension(configs=None):
    190     return HeaderIdExtension(configs=configs)
    191 
    192 if __name__ == "__main__":
    193     import doctest
    194     doctest.testmod()
    195 
    196