Home | History | Annotate | Download | only in extensions
      1 # markdown is released under the BSD license
      2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
      3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
      4 # Copyright 2004 Manfred Stienstra (the original version)
      5 # 
      6 # All rights reserved.
      7 # 
      8 # Redistribution and use in source and binary forms, with or without
      9 # modification, are permitted provided that the following conditions are met:
     10 # 
     11 # *   Redistributions of source code must retain the above copyright
     12 #     notice, this list of conditions and the following disclaimer.
     13 # *   Redistributions in binary form must reproduce the above copyright
     14 #     notice, this list of conditions and the following disclaimer in the
     15 #     documentation and/or other materials provided with the distribution.
     16 # *   Neither the name of the <organization> nor the
     17 #     names of its contributors may be used to endorse or promote products
     18 #     derived from this software without specific prior written permission.
     19 # 
     20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
     21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
     24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30 # POSSIBILITY OF SUCH DAMAGE.
     31 
     32 
     33 """
     34 Attribute List Extension for Python-Markdown
     35 ============================================
     36 
     37 Adds attribute list syntax. Inspired by 
     38 [maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
     39 feature of the same name.
     40 
     41 Copyright 2011 [Waylan Limberg](http://achinghead.com/).
     42 
     43 Contact: markdown (at] freewisdom.org
     44 
     45 License: BSD (see ../LICENSE.md for details) 
     46 
     47 Dependencies:
     48 * [Python 2.4+](http://python.org)
     49 * [Markdown 2.1+](http://packages.python.org/Markdown/)
     50 
     51 """
     52 
     53 from __future__ import absolute_import
     54 from __future__ import unicode_literals
     55 from . import Extension
     56 from ..treeprocessors import Treeprocessor
     57 from ..util import isBlockLevel
     58 import re
     59 
     60 try:
     61     Scanner = re.Scanner
     62 except AttributeError:
     63     # must be on Python 2.4
     64     from sre import Scanner
     65 
     66 def _handle_double_quote(s, t):
     67     k, v = t.split('=')
     68     return k, v.strip('"')
     69 
     70 def _handle_single_quote(s, t):
     71     k, v = t.split('=')
     72     return k, v.strip("'")
     73 
     74 def _handle_key_value(s, t): 
     75     return t.split('=')
     76 
     77 def _handle_word(s, t):
     78     if t.startswith('.'):
     79         return '.', t[1:]
     80     if t.startswith('#'):
     81         return 'id', t[1:]
     82     return t, t
     83 
     84 _scanner = Scanner([
     85     (r'[^ ]+=".*?"', _handle_double_quote),
     86     (r"[^ ]+='.*?'", _handle_single_quote),
     87     (r'[^ ]+=[^ ]*', _handle_key_value),
     88     (r'[^ ]+', _handle_word),
     89     (r' ', None)
     90 ])
     91 
     92 def get_attrs(str):
     93     """ Parse attribute list and return a list of attribute tuples. """
     94     return _scanner.scan(str)[0]
     95 
     96 def isheader(elem):
     97     return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
     98 
     99 class AttrListTreeprocessor(Treeprocessor):
    100     
    101     BASE_RE = r'\{\:?([^\}]*)\}'
    102     HEADER_RE = re.compile(r'[ ]*%s[ ]*$' % BASE_RE)
    103     BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE)
    104     INLINE_RE = re.compile(r'^%s' % BASE_RE)
    105     NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d'
    106                          r'\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef'
    107                          r'\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd'
    108                          r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
    109 
    110     def run(self, doc):
    111         for elem in doc.getiterator():
    112             if isBlockLevel(elem.tag):
    113                 # Block level: check for attrs on last line of text
    114                 RE = self.BLOCK_RE
    115                 if isheader(elem):
    116                     # header: check for attrs at end of line
    117                     RE = self.HEADER_RE
    118                 if len(elem) and elem[-1].tail:
    119                     # has children. Get from tail of last child
    120                     m = RE.search(elem[-1].tail)
    121                     if m:
    122                         self.assign_attrs(elem, m.group(1))
    123                         elem[-1].tail = elem[-1].tail[:m.start()]
    124                         if isheader(elem):
    125                             # clean up trailing #s
    126                             elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
    127                 elif elem.text:
    128                     # no children. Get from text.
    129                     m = RE.search(elem.text)
    130                     if m:
    131                         self.assign_attrs(elem, m.group(1))
    132                         elem.text = elem.text[:m.start()]
    133                         if isheader(elem):
    134                             # clean up trailing #s
    135                             elem.text = elem.text.rstrip('#').rstrip()
    136             else:
    137                 # inline: check for attrs at start of tail
    138                 if elem.tail:
    139                     m = self.INLINE_RE.match(elem.tail)
    140                     if m:
    141                         self.assign_attrs(elem, m.group(1))
    142                         elem.tail = elem.tail[m.end():]
    143 
    144     def assign_attrs(self, elem, attrs):
    145         """ Assign attrs to element. """
    146         for k, v in get_attrs(attrs):
    147             if k == '.':
    148                 # add to class
    149                 cls = elem.get('class')
    150                 if cls:
    151                     elem.set('class', '%s %s' % (cls, v))
    152                 else:
    153                     elem.set('class', v)
    154             else:
    155                 # assign attr k with v
    156                 elem.set(self.sanitize_name(k), v)
    157 
    158     def sanitize_name(self, name):
    159         """
    160         Sanitize name as 'an XML Name, minus the ":"'.
    161         See http://www.w3.org/TR/REC-xml-names/#NT-NCName
    162         """
    163         return self.NAME_RE.sub('_', name)
    164 
    165 
    166 class AttrListExtension(Extension):
    167     def extendMarkdown(self, md, md_globals):
    168         md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>prettify')
    169 
    170 
    171 def makeExtension(configs={}):
    172     return AttrListExtension(configs=configs)
    173