Home | History | Annotate | Download | only in markdown
      1 # markdown is released under the BSD license
      2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
      3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
      4 # Copyright 2004 Manfred Stienstra (the original version)
      5 # 
      6 # All rights reserved.
      7 # 
      8 # Redistribution and use in source and binary forms, with or without
      9 # modification, are permitted provided that the following conditions are met:
     10 # 
     11 # *   Redistributions of source code must retain the above copyright
     12 #     notice, this list of conditions and the following disclaimer.
     13 # *   Redistributions in binary form must reproduce the above copyright
     14 #     notice, this list of conditions and the following disclaimer in the
     15 #     documentation and/or other materials provided with the distribution.
     16 # *   Neither the name of the <organization> nor the
     17 #     names of its contributors may be used to endorse or promote products
     18 #     derived from this software without specific prior written permission.
     19 # 
     20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
     21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
     24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30 # POSSIBILITY OF SUCH DAMAGE.
     31 
     32 
     33 """
     34 POST-PROCESSORS
     35 =============================================================================
     36 
     37 Markdown also allows post-processors, which are similar to preprocessors in
     38 that they need to implement a "run" method. However, they are run after core
     39 processing.
     40 
     41 """
     42 
     43 from __future__ import absolute_import
     44 from __future__ import unicode_literals
     45 from . import util
     46 from . import odict
     47 import re
     48 
     49 
     50 def build_postprocessors(md_instance, **kwargs):
     51     """ Build the default postprocessors for Markdown. """
     52     postprocessors = odict.OrderedDict()
     53     postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance)
     54     postprocessors["amp_substitute"] = AndSubstitutePostprocessor()
     55     postprocessors["unescape"] = UnescapePostprocessor()
     56     return postprocessors
     57 
     58 
     59 class Postprocessor(util.Processor):
     60     """
     61     Postprocessors are run after the ElementTree it converted back into text.
     62 
     63     Each Postprocessor implements a "run" method that takes a pointer to a
     64     text string, modifies it as necessary and returns a text string.
     65 
     66     Postprocessors must extend markdown.Postprocessor.
     67 
     68     """
     69 
     70     def run(self, text):
     71         """
     72         Subclasses of Postprocessor should implement a `run` method, which
     73         takes the html document as a single text string and returns a
     74         (possibly modified) string.
     75 
     76         """
     77         pass
     78 
     79 
     80 class RawHtmlPostprocessor(Postprocessor):
     81     """ Restore raw html to the document. """
     82 
     83     def run(self, text):
     84         """ Iterate over html stash and restore "safe" html. """
     85         for i in range(self.markdown.htmlStash.html_counter):
     86             html, safe  = self.markdown.htmlStash.rawHtmlBlocks[i]
     87             if self.markdown.safeMode and not safe:
     88                 if str(self.markdown.safeMode).lower() == 'escape':
     89                     html = self.escape(html)
     90                 elif str(self.markdown.safeMode).lower() == 'remove':
     91                     html = ''
     92                 else:
     93                     html = self.markdown.html_replacement_text
     94             if self.isblocklevel(html) and (safe or not self.markdown.safeMode):
     95                 text = text.replace("<p>%s</p>" % 
     96                             (self.markdown.htmlStash.get_placeholder(i)),
     97                             html + "\n")
     98             text =  text.replace(self.markdown.htmlStash.get_placeholder(i), 
     99                                  html)
    100         return text
    101 
    102     def escape(self, html):
    103         """ Basic html escaping """
    104         html = html.replace('&', '&amp;')
    105         html = html.replace('<', '&lt;')
    106         html = html.replace('>', '&gt;')
    107         return html.replace('"', '&quot;')
    108 
    109     def isblocklevel(self, html):
    110         m = re.match(r'^\<\/?([^ >]+)', html)
    111         if m:
    112             if m.group(1)[0] in ('!', '?', '@', '%'):
    113                 # Comment, php etc...
    114                 return True
    115             return util.isBlockLevel(m.group(1))
    116         return False
    117 
    118 
    119 class AndSubstitutePostprocessor(Postprocessor):
    120     """ Restore valid entities """
    121 
    122     def run(self, text):
    123         text =  text.replace(util.AMP_SUBSTITUTE, "&")
    124         return text
    125 
    126 
    127 class UnescapePostprocessor(Postprocessor):
    128     """ Restore escaped chars """
    129 
    130     RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX))
    131 
    132     def unescape(self, m):
    133         return util.int2str(int(m.group(1)))
    134 
    135     def run(self, text):
    136         return self.RE.sub(self.unescape, text)
    137