Home | History | Annotate | Download | only in markdown
      1 # -*- coding: utf-8 -*-
      2 # markdown is released under the BSD license
      3 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
      4 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
      5 # Copyright 2004 Manfred Stienstra (the original version)
      6 # 
      7 # All rights reserved.
      8 # 
      9 # Redistribution and use in source and binary forms, with or without
     10 # modification, are permitted provided that the following conditions are met:
     11 # 
     12 # *   Redistributions of source code must retain the above copyright
     13 #     notice, this list of conditions and the following disclaimer.
     14 # *   Redistributions in binary form must reproduce the above copyright
     15 #     notice, this list of conditions and the following disclaimer in the
     16 #     documentation and/or other materials provided with the distribution.
     17 # *   Neither the name of the <organization> nor the
     18 #     names of its contributors may be used to endorse or promote products
     19 #     derived from this software without specific prior written permission.
     20 # 
     21 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
     22 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     23 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     24 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
     25 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     26 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     27 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     28 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     29 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     30 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     31 # POSSIBILITY OF SUCH DAMAGE.
     32 
     33 
     34 from __future__ import unicode_literals
     35 import re
     36 import sys
     37 
     38 
     39 """
     40 Python 3 Stuff
     41 =============================================================================
     42 """
     43 PY3 = sys.version_info[0] == 3
     44 
     45 if PY3:
     46     string_type = str
     47     text_type = str
     48     int2str = chr
     49 else:
     50     string_type = basestring
     51     text_type = unicode
     52     int2str = unichr
     53 
     54 
     55 """
     56 Constants you might want to modify
     57 -----------------------------------------------------------------------------
     58 """
     59 
     60 BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
     61                                   "|script|noscript|form|fieldset|iframe|math"
     62                                   "|hr|hr/|style|li|dt|dd|thead|tbody"
     63                                   "|tr|th|td|section|footer|header|group|figure"
     64                                   "|figcaption|aside|article|canvas|output"
     65                                   "|progress|video)$", re.IGNORECASE)
     66 # Placeholders
     67 STX = '\u0002'  # Use STX ("Start of text") for start-of-placeholder
     68 ETX = '\u0003'  # Use ETX ("End of text") for end-of-placeholder
     69 INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
     70 INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
     71 INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
     72 AMP_SUBSTITUTE = STX+"amp"+ETX
     73 
     74 """
     75 Constants you probably do not need to change
     76 -----------------------------------------------------------------------------
     77 """
     78 
     79 RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'),
     80                      # Hebrew (0590-05FF), Arabic (0600-06FF),
     81                      # Syriac (0700-074F), Arabic supplement (0750-077F),
     82                      # Thaana (0780-07BF), Nko (07C0-07FF).
     83                     ('\u2D30', '\u2D7F'), # Tifinagh
     84                     )
     85 
     86 # Extensions should use "markdown.util.etree" instead of "etree" (or do `from
     87 # markdown.util import etree`).  Do not import it by yourself.
     88 
     89 try: # Is the C implemenation of ElementTree available?
     90     import xml.etree.cElementTree as etree
     91     from xml.etree.ElementTree import Comment
     92     # Serializers (including ours) test with non-c Comment
     93     etree.test_comment = Comment
     94     if etree.VERSION < "1.0.5":
     95         raise RuntimeError("cElementTree version 1.0.5 or higher is required.")
     96 except (ImportError, RuntimeError):
     97     # Use the Python implementation of ElementTree?
     98     import xml.etree.ElementTree as etree
     99     if etree.VERSION < "1.1":
    100         raise RuntimeError("ElementTree version 1.1 or higher is required")
    101 
    102 
    103 """
    104 AUXILIARY GLOBAL FUNCTIONS
    105 =============================================================================
    106 """
    107 
    108 
    109 def isBlockLevel(tag):
    110     """Check if the tag is a block level HTML tag."""
    111     if isinstance(tag, string_type):
    112         return BLOCK_LEVEL_ELEMENTS.match(tag)
    113     # Some ElementTree tags are not strings, so return False.
    114     return False
    115 
    116 """
    117 MISC AUXILIARY CLASSES
    118 =============================================================================
    119 """
    120 
    121 class AtomicString(text_type):
    122     """A string which should not be further processed."""
    123     pass
    124 
    125 
    126 class Processor(object):
    127     def __init__(self, markdown_instance=None):
    128         if markdown_instance:
    129             self.markdown = markdown_instance
    130 
    131 
    132 class HtmlStash(object):
    133     """
    134     This class is used for stashing HTML objects that we extract
    135     in the beginning and replace with place-holders.
    136     """
    137 
    138     def __init__ (self):
    139         """ Create a HtmlStash. """
    140         self.html_counter = 0 # for counting inline html segments
    141         self.rawHtmlBlocks=[]
    142 
    143     def store(self, html, safe=False):
    144         """
    145         Saves an HTML segment for later reinsertion.  Returns a
    146         placeholder string that needs to be inserted into the
    147         document.
    148 
    149         Keyword arguments:
    150 
    151         * html: an html segment
    152         * safe: label an html segment as safe for safemode
    153 
    154         Returns : a placeholder string
    155 
    156         """
    157         self.rawHtmlBlocks.append((html, safe))
    158         placeholder = self.get_placeholder(self.html_counter)
    159         self.html_counter += 1
    160         return placeholder
    161 
    162     def reset(self):
    163         self.html_counter = 0
    164         self.rawHtmlBlocks = []
    165 
    166     def get_placeholder(self, key):
    167         return "%swzxhzdk:%d%s" % (STX, key, ETX)
    168 
    169