1 # -*- coding: utf-8 -*- 2 # markdown is released under the BSD license 3 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later) 4 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) 5 # Copyright 2004 Manfred Stienstra (the original version) 6 # 7 # All rights reserved. 8 # 9 # Redistribution and use in source and binary forms, with or without 10 # modification, are permitted provided that the following conditions are met: 11 # 12 # * Redistributions of source code must retain the above copyright 13 # notice, this list of conditions and the following disclaimer. 14 # * Redistributions in binary form must reproduce the above copyright 15 # notice, this list of conditions and the following disclaimer in the 16 # documentation and/or other materials provided with the distribution. 17 # * Neither the name of the <organization> nor the 18 # names of its contributors may be used to endorse or promote products 19 # derived from this software without specific prior written permission. 20 # 21 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY 22 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT 25 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 # POSSIBILITY OF SUCH DAMAGE. 32 33 34 from __future__ import unicode_literals 35 import re 36 import sys 37 38 39 """ 40 Python 3 Stuff 41 ============================================================================= 42 """ 43 PY3 = sys.version_info[0] == 3 44 45 if PY3: 46 string_type = str 47 text_type = str 48 int2str = chr 49 else: 50 string_type = basestring 51 text_type = unicode 52 int2str = unichr 53 54 55 """ 56 Constants you might want to modify 57 ----------------------------------------------------------------------------- 58 """ 59 60 BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" 61 "|script|noscript|form|fieldset|iframe|math" 62 "|hr|hr/|style|li|dt|dd|thead|tbody" 63 "|tr|th|td|section|footer|header|group|figure" 64 "|figcaption|aside|article|canvas|output" 65 "|progress|video)$", re.IGNORECASE) 66 # Placeholders 67 STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder 68 ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder 69 INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" 70 INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX 71 INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})') 72 AMP_SUBSTITUTE = STX+"amp"+ETX 73 74 """ 75 Constants you probably do not need to change 76 ----------------------------------------------------------------------------- 77 """ 78 79 RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'), 80 # Hebrew (0590-05FF), Arabic (0600-06FF), 81 # Syriac (0700-074F), Arabic supplement (0750-077F), 82 # Thaana (0780-07BF), Nko (07C0-07FF). 83 ('\u2D30', '\u2D7F'), # Tifinagh 84 ) 85 86 # Extensions should use "markdown.util.etree" instead of "etree" (or do `from 87 # markdown.util import etree`). Do not import it by yourself. 88 89 try: # Is the C implemenation of ElementTree available? 90 import xml.etree.cElementTree as etree 91 from xml.etree.ElementTree import Comment 92 # Serializers (including ours) test with non-c Comment 93 etree.test_comment = Comment 94 if etree.VERSION < "1.0.5": 95 raise RuntimeError("cElementTree version 1.0.5 or higher is required.") 96 except (ImportError, RuntimeError): 97 # Use the Python implementation of ElementTree? 98 import xml.etree.ElementTree as etree 99 if etree.VERSION < "1.1": 100 raise RuntimeError("ElementTree version 1.1 or higher is required") 101 102 103 """ 104 AUXILIARY GLOBAL FUNCTIONS 105 ============================================================================= 106 """ 107 108 109 def isBlockLevel(tag): 110 """Check if the tag is a block level HTML tag.""" 111 if isinstance(tag, string_type): 112 return BLOCK_LEVEL_ELEMENTS.match(tag) 113 # Some ElementTree tags are not strings, so return False. 114 return False 115 116 """ 117 MISC AUXILIARY CLASSES 118 ============================================================================= 119 """ 120 121 class AtomicString(text_type): 122 """A string which should not be further processed.""" 123 pass 124 125 126 class Processor(object): 127 def __init__(self, markdown_instance=None): 128 if markdown_instance: 129 self.markdown = markdown_instance 130 131 132 class HtmlStash(object): 133 """ 134 This class is used for stashing HTML objects that we extract 135 in the beginning and replace with place-holders. 136 """ 137 138 def __init__ (self): 139 """ Create a HtmlStash. """ 140 self.html_counter = 0 # for counting inline html segments 141 self.rawHtmlBlocks=[] 142 143 def store(self, html, safe=False): 144 """ 145 Saves an HTML segment for later reinsertion. Returns a 146 placeholder string that needs to be inserted into the 147 document. 148 149 Keyword arguments: 150 151 * html: an html segment 152 * safe: label an html segment as safe for safemode 153 154 Returns : a placeholder string 155 156 """ 157 self.rawHtmlBlocks.append((html, safe)) 158 placeholder = self.get_placeholder(self.html_counter) 159 self.html_counter += 1 160 return placeholder 161 162 def reset(self): 163 self.html_counter = 0 164 self.rawHtmlBlocks = [] 165 166 def get_placeholder(self, key): 167 return "%swzxhzdk:%d%s" % (STX, key, ETX) 168 169