Home | History | Annotate | Download | only in markdown
      1 # markdown is released under the BSD license
      2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
      3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
      4 # Copyright 2004 Manfred Stienstra (the original version)
      5 # 
      6 # All rights reserved.
      7 # 
      8 # Redistribution and use in source and binary forms, with or without
      9 # modification, are permitted provided that the following conditions are met:
     10 # 
     11 # *   Redistributions of source code must retain the above copyright
     12 #     notice, this list of conditions and the following disclaimer.
     13 # *   Redistributions in binary form must reproduce the above copyright
     14 #     notice, this list of conditions and the following disclaimer in the
     15 #     documentation and/or other materials provided with the distribution.
     16 # *   Neither the name of the <organization> nor the
     17 #     names of its contributors may be used to endorse or promote products
     18 #     derived from this software without specific prior written permission.
     19 # 
     20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
     21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
     24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30 # POSSIBILITY OF SUCH DAMAGE.
     31 
     32 
     33 from __future__ import unicode_literals
     34 from __future__ import absolute_import
     35 from . import util
     36 from . import odict
     37 from . import inlinepatterns
     38 
     39 
     40 def build_treeprocessors(md_instance, **kwargs):
     41     """ Build the default treeprocessors for Markdown. """
     42     treeprocessors = odict.OrderedDict()
     43     treeprocessors["inline"] = InlineProcessor(md_instance)
     44     treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)
     45     return treeprocessors
     46 
     47 
     48 def isString(s):
     49     """ Check if it's string """
     50     if not isinstance(s, util.AtomicString):
     51         return isinstance(s, util.string_type)
     52     return False
     53 
     54 
     55 class Treeprocessor(util.Processor):
     56     """
     57     Treeprocessors are run on the ElementTree object before serialization.
     58 
     59     Each Treeprocessor implements a "run" method that takes a pointer to an
     60     ElementTree, modifies it as necessary and returns an ElementTree
     61     object.
     62 
     63     Treeprocessors must extend markdown.Treeprocessor.
     64 
     65     """
     66     def run(self, root):
     67         """
     68         Subclasses of Treeprocessor should implement a `run` method, which
     69         takes a root ElementTree. This method can return another ElementTree 
     70         object, and the existing root ElementTree will be replaced, or it can 
     71         modify the current tree and return None.
     72         """
     73         pass
     74 
     75 
     76 class InlineProcessor(Treeprocessor):
     77     """
     78     A Treeprocessor that traverses a tree, applying inline patterns.
     79     """
     80 
     81     def __init__(self, md):
     82         self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
     83         self.__placeholder_suffix = util.ETX
     84         self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
     85                                       + len(self.__placeholder_suffix)
     86         self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
     87         self.markdown = md
     88 
     89     def __makePlaceholder(self, type):
     90         """ Generate a placeholder """
     91         id = "%04d" % len(self.stashed_nodes)
     92         hash = util.INLINE_PLACEHOLDER % id
     93         return hash, id
     94 
     95     def __findPlaceholder(self, data, index):
     96         """
     97         Extract id from data string, start from index
     98 
     99         Keyword arguments:
    100 
    101         * data: string
    102         * index: index, from which we start search
    103 
    104         Returns: placeholder id and string index, after the found placeholder.
    105         
    106         """
    107         m = self.__placeholder_re.search(data, index)
    108         if m:
    109             return m.group(1), m.end()
    110         else:
    111             return None, index + 1
    112 
    113     def __stashNode(self, node, type):
    114         """ Add node to stash """
    115         placeholder, id = self.__makePlaceholder(type)
    116         self.stashed_nodes[id] = node
    117         return placeholder
    118 
    119     def __handleInline(self, data, patternIndex=0):
    120         """
    121         Process string with inline patterns and replace it
    122         with placeholders
    123 
    124         Keyword arguments:
    125 
    126         * data: A line of Markdown text
    127         * patternIndex: The index of the inlinePattern to start with
    128 
    129         Returns: String with placeholders.
    130 
    131         """
    132         if not isinstance(data, util.AtomicString):
    133             startIndex = 0
    134             while patternIndex < len(self.markdown.inlinePatterns):
    135                 data, matched, startIndex = self.__applyPattern(
    136                     self.markdown.inlinePatterns.value_for_index(patternIndex),
    137                     data, patternIndex, startIndex)
    138                 if not matched:
    139                     patternIndex += 1
    140         return data
    141 
    142     def __processElementText(self, node, subnode, isText=True):
    143         """
    144         Process placeholders in Element.text or Element.tail
    145         of Elements popped from self.stashed_nodes.
    146 
    147         Keywords arguments:
    148 
    149         * node: parent node
    150         * subnode: processing node
    151         * isText: bool variable, True - it's text, False - it's tail
    152 
    153         Returns: None
    154 
    155         """
    156         if isText:
    157             text = subnode.text
    158             subnode.text = None
    159         else:
    160             text = subnode.tail
    161             subnode.tail = None
    162 
    163         childResult = self.__processPlaceholders(text, subnode)
    164 
    165         if not isText and node is not subnode:
    166             pos = node.getchildren().index(subnode)
    167             node.remove(subnode)
    168         else:
    169             pos = 0
    170 
    171         childResult.reverse()
    172         for newChild in childResult:
    173             node.insert(pos, newChild)
    174 
    175     def __processPlaceholders(self, data, parent):
    176         """
    177         Process string with placeholders and generate ElementTree tree.
    178 
    179         Keyword arguments:
    180 
    181         * data: string with placeholders instead of ElementTree elements.
    182         * parent: Element, which contains processing inline data
    183 
    184         Returns: list with ElementTree elements with applied inline patterns.
    185         
    186         """
    187         def linkText(text):
    188             if text:
    189                 if result:
    190                     if result[-1].tail:
    191                         result[-1].tail += text
    192                     else:
    193                         result[-1].tail = text
    194                 else:
    195                     if parent.text:
    196                         parent.text += text
    197                     else:
    198                         parent.text = text
    199         result = []
    200         strartIndex = 0
    201         while data:
    202             index = data.find(self.__placeholder_prefix, strartIndex)
    203             if index != -1:
    204                 id, phEndIndex = self.__findPlaceholder(data, index)
    205 
    206                 if id in self.stashed_nodes:
    207                     node = self.stashed_nodes.get(id)
    208 
    209                     if index > 0:
    210                         text = data[strartIndex:index]
    211                         linkText(text)
    212 
    213                     if not isString(node): # it's Element
    214                         for child in [node] + node.getchildren():
    215                             if child.tail:
    216                                 if child.tail.strip():
    217                                     self.__processElementText(node, child,False)
    218                             if child.text:
    219                                 if child.text.strip():
    220                                     self.__processElementText(child, child)
    221                     else: # it's just a string
    222                         linkText(node)
    223                         strartIndex = phEndIndex
    224                         continue
    225 
    226                     strartIndex = phEndIndex
    227                     result.append(node)
    228 
    229                 else: # wrong placeholder
    230                     end = index + len(self.__placeholder_prefix)
    231                     linkText(data[strartIndex:end])
    232                     strartIndex = end
    233             else:
    234                 text = data[strartIndex:]
    235                 if isinstance(data, util.AtomicString):
    236                     # We don't want to loose the AtomicString
    237                     text = util.AtomicString(text)
    238                 linkText(text)
    239                 data = ""
    240 
    241         return result
    242 
    243     def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
    244         """
    245         Check if the line fits the pattern, create the necessary
    246         elements, add it to stashed_nodes.
    247 
    248         Keyword arguments:
    249 
    250         * data: the text to be processed
    251         * pattern: the pattern to be checked
    252         * patternIndex: index of current pattern
    253         * startIndex: string index, from which we start searching
    254 
    255         Returns: String with placeholders instead of ElementTree elements.
    256 
    257         """
    258         match = pattern.getCompiledRegExp().match(data[startIndex:])
    259         leftData = data[:startIndex]
    260 
    261         if not match:
    262             return data, False, 0
    263 
    264         node = pattern.handleMatch(match)
    265 
    266         if node is None:
    267             return data, True, len(leftData)+match.span(len(match.groups()))[0]
    268 
    269         if not isString(node):
    270             if not isinstance(node.text, util.AtomicString):
    271                 # We need to process current node too
    272                 for child in [node] + node.getchildren():
    273                     if not isString(node):
    274                         if child.text: 
    275                             child.text = self.__handleInline(child.text,
    276                                                             patternIndex + 1)
    277                         if child.tail:
    278                             child.tail = self.__handleInline(child.tail,
    279                                                             patternIndex)
    280 
    281         placeholder = self.__stashNode(node, pattern.type())
    282 
    283         return "%s%s%s%s" % (leftData,
    284                              match.group(1),
    285                              placeholder, match.groups()[-1]), True, 0
    286 
    287     def run(self, tree):
    288         """Apply inline patterns to a parsed Markdown tree.
    289 
    290         Iterate over ElementTree, find elements with inline tag, apply inline
    291         patterns and append newly created Elements to tree.  If you don't
    292         want to process your data with inline paterns, instead of normal string,
    293         use subclass AtomicString:
    294 
    295             node.text = markdown.AtomicString("This will not be processed.")
    296 
    297         Arguments:
    298 
    299         * tree: ElementTree object, representing Markdown tree.
    300 
    301         Returns: ElementTree object with applied inline patterns.
    302 
    303         """
    304         self.stashed_nodes = {}
    305 
    306         stack = [tree]
    307 
    308         while stack:
    309             currElement = stack.pop()
    310             insertQueue = []
    311             for child in currElement.getchildren():
    312                 if child.text and not isinstance(child.text, util.AtomicString):
    313                     text = child.text
    314                     child.text = None
    315                     lst = self.__processPlaceholders(self.__handleInline(
    316                                                     text), child)
    317                     stack += lst
    318                     insertQueue.append((child, lst))
    319                 if child.tail:
    320                     tail = self.__handleInline(child.tail)
    321                     dumby = util.etree.Element('d')
    322                     tailResult = self.__processPlaceholders(tail, dumby)
    323                     if dumby.text:
    324                         child.tail = dumby.text
    325                     else:
    326                         child.tail = None
    327                     pos = currElement.getchildren().index(child) + 1
    328                     tailResult.reverse()
    329                     for newChild in tailResult:
    330                         currElement.insert(pos, newChild)
    331                 if child.getchildren():
    332                     stack.append(child)
    333 
    334             for element, lst in insertQueue:
    335                 if self.markdown.enable_attributes:
    336                     if element.text and isString(element.text):
    337                         element.text = \
    338                             inlinepatterns.handleAttributes(element.text, 
    339                                                                     element)
    340                 i = 0
    341                 for newChild in lst:
    342                     if self.markdown.enable_attributes:
    343                         # Processing attributes
    344                         if newChild.tail and isString(newChild.tail):
    345                             newChild.tail = \
    346                                 inlinepatterns.handleAttributes(newChild.tail,
    347                                                                     element)
    348                         if newChild.text and isString(newChild.text):
    349                             newChild.text = \
    350                                 inlinepatterns.handleAttributes(newChild.text,
    351                                                                     newChild)
    352                     element.insert(i, newChild)
    353                     i += 1
    354         return tree
    355 
    356 
    357 class PrettifyTreeprocessor(Treeprocessor):
    358     """ Add linebreaks to the html document. """
    359 
    360     def _prettifyETree(self, elem):
    361         """ Recursively add linebreaks to ElementTree children. """
    362 
    363         i = "\n"
    364         if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
    365             if (not elem.text or not elem.text.strip()) \
    366                     and len(elem) and util.isBlockLevel(elem[0].tag):
    367                 elem.text = i
    368             for e in elem:
    369                 if util.isBlockLevel(e.tag):
    370                     self._prettifyETree(e)
    371             if not elem.tail or not elem.tail.strip():
    372                 elem.tail = i
    373         if not elem.tail or not elem.tail.strip():
    374             elem.tail = i
    375 
    376     def run(self, root):
    377         """ Add linebreaks to ElementTree root object. """
    378 
    379         self._prettifyETree(root)
    380         # Do <br />'s seperately as they are often in the middle of
    381         # inline content and missed by _prettifyETree.
    382         brs = root.getiterator('br')
    383         for br in brs:
    384             if not br.tail or not br.tail.strip():
    385                 br.tail = '\n'
    386             else:
    387                 br.tail = '\n%s' % br.tail
    388         # Clean up extra empty lines at end of code blocks.
    389         pres = root.getiterator('pre')
    390         for pre in pres:
    391             if len(pre) and pre[0].tag == 'code':
    392                 pre[0].text = pre[0].text.rstrip() + '\n'
    393