1 # markdown is released under the BSD license 2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later) 3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) 4 # Copyright 2004 Manfred Stienstra (the original version) 5 # 6 # All rights reserved. 7 # 8 # Redistribution and use in source and binary forms, with or without 9 # modification, are permitted provided that the following conditions are met: 10 # 11 # * Redistributions of source code must retain the above copyright 12 # notice, this list of conditions and the following disclaimer. 13 # * Redistributions in binary form must reproduce the above copyright 14 # notice, this list of conditions and the following disclaimer in the 15 # documentation and/or other materials provided with the distribution. 16 # * Neither the name of the <organization> nor the 17 # names of its contributors may be used to endorse or promote products 18 # derived from this software without specific prior written permission. 19 # 20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY 21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT 24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 # POSSIBILITY OF SUCH DAMAGE. 31 32 33 # markdown/searializers.py 34 # 35 # Add x/html serialization to Elementree 36 # Taken from ElementTree 1.3 preview with slight modifications 37 # 38 # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. 39 # 40 # fredrik (at] pythonware.com 41 # http://www.pythonware.com 42 # 43 # -------------------------------------------------------------------- 44 # The ElementTree toolkit is 45 # 46 # Copyright (c) 1999-2007 by Fredrik Lundh 47 # 48 # By obtaining, using, and/or copying this software and/or its 49 # associated documentation, you agree that you have read, understood, 50 # and will comply with the following terms and conditions: 51 # 52 # Permission to use, copy, modify, and distribute this software and 53 # its associated documentation for any purpose and without fee is 54 # hereby granted, provided that the above copyright notice appears in 55 # all copies, and that both that copyright notice and this permission 56 # notice appear in supporting documentation, and that the name of 57 # Secret Labs AB or the author not be used in advertising or publicity 58 # pertaining to distribution of the software without specific, written 59 # prior permission. 60 # 61 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 62 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 63 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 64 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 65 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 66 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 67 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 68 # OF THIS SOFTWARE. 69 # -------------------------------------------------------------------- 70 71 72 from __future__ import absolute_import 73 from __future__ import unicode_literals 74 from . import util 75 ElementTree = util.etree.ElementTree 76 QName = util.etree.QName 77 if hasattr(util.etree, 'test_comment'): 78 Comment = util.etree.test_comment 79 else: 80 Comment = util.etree.Comment 81 PI = util.etree.PI 82 ProcessingInstruction = util.etree.ProcessingInstruction 83 84 __all__ = ['to_html_string', 'to_xhtml_string'] 85 86 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 87 "img", "input", "isindex", "link", "meta" "param") 88 89 try: 90 HTML_EMPTY = set(HTML_EMPTY) 91 except NameError: 92 pass 93 94 _namespace_map = { 95 # "well-known" namespace prefixes 96 "http://www.w3.org/XML/1998/namespace": "xml", 97 "http://www.w3.org/1999/xhtml": "html", 98 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 99 "http://schemas.xmlsoap.org/wsdl/": "wsdl", 100 # xml schema 101 "http://www.w3.org/2001/XMLSchema": "xs", 102 "http://www.w3.org/2001/XMLSchema-instance": "xsi", 103 # dublic core 104 "http://purl.org/dc/elements/1.1/": "dc", 105 } 106 107 108 def _raise_serialization_error(text): 109 raise TypeError( 110 "cannot serialize %r (type %s)" % (text, type(text).__name__) 111 ) 112 113 def _encode(text, encoding): 114 try: 115 return text.encode(encoding, "xmlcharrefreplace") 116 except (TypeError, AttributeError): 117 _raise_serialization_error(text) 118 119 def _escape_cdata(text): 120 # escape character data 121 try: 122 # it's worth avoiding do-nothing calls for strings that are 123 # shorter than 500 character, or so. assume that's, by far, 124 # the most common case in most applications. 125 if "&" in text: 126 text = text.replace("&", "&") 127 if "<" in text: 128 text = text.replace("<", "<") 129 if ">" in text: 130 text = text.replace(">", ">") 131 return text 132 except (TypeError, AttributeError): 133 _raise_serialization_error(text) 134 135 136 def _escape_attrib(text): 137 # escape attribute value 138 try: 139 if "&" in text: 140 text = text.replace("&", "&") 141 if "<" in text: 142 text = text.replace("<", "<") 143 if ">" in text: 144 text = text.replace(">", ">") 145 if "\"" in text: 146 text = text.replace("\"", """) 147 if "\n" in text: 148 text = text.replace("\n", " ") 149 return text 150 except (TypeError, AttributeError): 151 _raise_serialization_error(text) 152 153 def _escape_attrib_html(text): 154 # escape attribute value 155 try: 156 if "&" in text: 157 text = text.replace("&", "&") 158 if "<" in text: 159 text = text.replace("<", "<") 160 if ">" in text: 161 text = text.replace(">", ">") 162 if "\"" in text: 163 text = text.replace("\"", """) 164 return text 165 except (TypeError, AttributeError): 166 _raise_serialization_error(text) 167 168 169 def _serialize_html(write, elem, qnames, namespaces, format): 170 tag = elem.tag 171 text = elem.text 172 if tag is Comment: 173 write("<!--%s-->" % _escape_cdata(text)) 174 elif tag is ProcessingInstruction: 175 write("<?%s?>" % _escape_cdata(text)) 176 else: 177 tag = qnames[tag] 178 if tag is None: 179 if text: 180 write(_escape_cdata(text)) 181 for e in elem: 182 _serialize_html(write, e, qnames, None, format) 183 else: 184 write("<" + tag) 185 items = elem.items() 186 if items or namespaces: 187 items.sort() # lexical order 188 for k, v in items: 189 if isinstance(k, QName): 190 k = k.text 191 if isinstance(v, QName): 192 v = qnames[v.text] 193 else: 194 v = _escape_attrib_html(v) 195 if qnames[k] == v and format == 'html': 196 # handle boolean attributes 197 write(" %s" % v) 198 else: 199 write(" %s=\"%s\"" % (qnames[k], v)) 200 if namespaces: 201 items = namespaces.items() 202 items.sort(key=lambda x: x[1]) # sort on prefix 203 for v, k in items: 204 if k: 205 k = ":" + k 206 write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v))) 207 if format == "xhtml" and tag in HTML_EMPTY: 208 write(" />") 209 else: 210 write(">") 211 tag = tag.lower() 212 if text: 213 if tag == "script" or tag == "style": 214 write(text) 215 else: 216 write(_escape_cdata(text)) 217 for e in elem: 218 _serialize_html(write, e, qnames, None, format) 219 if tag not in HTML_EMPTY: 220 write("</" + tag + ">") 221 if elem.tail: 222 write(_escape_cdata(elem.tail)) 223 224 def _write_html(root, 225 encoding=None, 226 default_namespace=None, 227 format="html"): 228 assert root is not None 229 data = [] 230 write = data.append 231 qnames, namespaces = _namespaces(root, default_namespace) 232 _serialize_html(write, root, qnames, namespaces, format) 233 if encoding is None: 234 return "".join(data) 235 else: 236 return _encode("".join(data)) 237 238 239 # -------------------------------------------------------------------- 240 # serialization support 241 242 def _namespaces(elem, default_namespace=None): 243 # identify namespaces used in this tree 244 245 # maps qnames to *encoded* prefix:local names 246 qnames = {None: None} 247 248 # maps uri:s to prefixes 249 namespaces = {} 250 if default_namespace: 251 namespaces[default_namespace] = "" 252 253 def add_qname(qname): 254 # calculate serialized qname representation 255 try: 256 if qname[:1] == "{": 257 uri, tag = qname[1:].split("}", 1) 258 prefix = namespaces.get(uri) 259 if prefix is None: 260 prefix = _namespace_map.get(uri) 261 if prefix is None: 262 prefix = "ns%d" % len(namespaces) 263 if prefix != "xml": 264 namespaces[uri] = prefix 265 if prefix: 266 qnames[qname] = "%s:%s" % (prefix, tag) 267 else: 268 qnames[qname] = tag # default element 269 else: 270 if default_namespace: 271 raise ValueError( 272 "cannot use non-qualified names with " 273 "default_namespace option" 274 ) 275 qnames[qname] = qname 276 except TypeError: 277 _raise_serialization_error(qname) 278 279 # populate qname and namespaces table 280 try: 281 iterate = elem.iter 282 except AttributeError: 283 iterate = elem.getiterator # cET compatibility 284 for elem in iterate(): 285 tag = elem.tag 286 if isinstance(tag, QName) and tag.text not in qnames: 287 add_qname(tag.text) 288 elif isinstance(tag, util.string_type): 289 if tag not in qnames: 290 add_qname(tag) 291 elif tag is not None and tag is not Comment and tag is not PI: 292 _raise_serialization_error(tag) 293 for key, value in elem.items(): 294 if isinstance(key, QName): 295 key = key.text 296 if key not in qnames: 297 add_qname(key) 298 if isinstance(value, QName) and value.text not in qnames: 299 add_qname(value.text) 300 text = elem.text 301 if isinstance(text, QName) and text.text not in qnames: 302 add_qname(text.text) 303 return qnames, namespaces 304 305 def to_html_string(element): 306 return _write_html(ElementTree(element).getroot(), format="html") 307 308 def to_xhtml_string(element): 309 return _write_html(ElementTree(element).getroot(), format="xhtml") 310