Home | History | Annotate | Download | only in markdown
      1 # markdown is released under the BSD license
      2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
      3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
      4 # Copyright 2004 Manfred Stienstra (the original version)
      5 # 
      6 # All rights reserved.
      7 # 
      8 # Redistribution and use in source and binary forms, with or without
      9 # modification, are permitted provided that the following conditions are met:
     10 # 
     11 # *   Redistributions of source code must retain the above copyright
     12 #     notice, this list of conditions and the following disclaimer.
     13 # *   Redistributions in binary form must reproduce the above copyright
     14 #     notice, this list of conditions and the following disclaimer in the
     15 #     documentation and/or other materials provided with the distribution.
     16 # *   Neither the name of the <organization> nor the
     17 #     names of its contributors may be used to endorse or promote products
     18 #     derived from this software without specific prior written permission.
     19 # 
     20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
     21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
     24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30 # POSSIBILITY OF SUCH DAMAGE.
     31 
     32 
     33 # markdown/searializers.py
     34 #
     35 # Add x/html serialization to Elementree
     36 # Taken from ElementTree 1.3 preview with slight modifications
     37 #
     38 # Copyright (c) 1999-2007 by Fredrik Lundh.  All rights reserved.
     39 #
     40 # fredrik (at] pythonware.com
     41 # http://www.pythonware.com
     42 #
     43 # --------------------------------------------------------------------
     44 # The ElementTree toolkit is
     45 #
     46 # Copyright (c) 1999-2007 by Fredrik Lundh
     47 #
     48 # By obtaining, using, and/or copying this software and/or its
     49 # associated documentation, you agree that you have read, understood,
     50 # and will comply with the following terms and conditions:
     51 #
     52 # Permission to use, copy, modify, and distribute this software and
     53 # its associated documentation for any purpose and without fee is
     54 # hereby granted, provided that the above copyright notice appears in
     55 # all copies, and that both that copyright notice and this permission
     56 # notice appear in supporting documentation, and that the name of
     57 # Secret Labs AB or the author not be used in advertising or publicity
     58 # pertaining to distribution of the software without specific, written
     59 # prior permission.
     60 #
     61 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
     62 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
     63 # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
     64 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
     65 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
     66 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
     67 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
     68 # OF THIS SOFTWARE.
     69 # --------------------------------------------------------------------
     70 
     71 
     72 from __future__ import absolute_import
     73 from __future__ import unicode_literals
     74 from . import util
     75 ElementTree = util.etree.ElementTree
     76 QName = util.etree.QName
     77 if hasattr(util.etree, 'test_comment'):
     78     Comment = util.etree.test_comment
     79 else:
     80     Comment = util.etree.Comment
     81 PI = util.etree.PI
     82 ProcessingInstruction = util.etree.ProcessingInstruction
     83 
     84 __all__ = ['to_html_string', 'to_xhtml_string']
     85 
     86 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
     87               "img", "input", "isindex", "link", "meta" "param")
     88 
     89 try:
     90     HTML_EMPTY = set(HTML_EMPTY)
     91 except NameError:
     92     pass
     93 
     94 _namespace_map = {
     95     # "well-known" namespace prefixes
     96     "http://www.w3.org/XML/1998/namespace": "xml",
     97     "http://www.w3.org/1999/xhtml": "html",
     98     "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
     99     "http://schemas.xmlsoap.org/wsdl/": "wsdl",
    100     # xml schema
    101     "http://www.w3.org/2001/XMLSchema": "xs",
    102     "http://www.w3.org/2001/XMLSchema-instance": "xsi",
    103     # dublic core
    104     "http://purl.org/dc/elements/1.1/": "dc",
    105 }
    106 
    107 
    108 def _raise_serialization_error(text):
    109     raise TypeError(
    110         "cannot serialize %r (type %s)" % (text, type(text).__name__)
    111         )
    112 
    113 def _encode(text, encoding):
    114     try:
    115         return text.encode(encoding, "xmlcharrefreplace")
    116     except (TypeError, AttributeError):
    117         _raise_serialization_error(text)
    118 
    119 def _escape_cdata(text):
    120     # escape character data
    121     try:
    122         # it's worth avoiding do-nothing calls for strings that are
    123         # shorter than 500 character, or so.  assume that's, by far,
    124         # the most common case in most applications.
    125         if "&" in text:
    126             text = text.replace("&", "&amp;")
    127         if "<" in text:
    128             text = text.replace("<", "&lt;")
    129         if ">" in text:
    130             text = text.replace(">", "&gt;")
    131         return text
    132     except (TypeError, AttributeError):
    133         _raise_serialization_error(text)
    134 
    135 
    136 def _escape_attrib(text):
    137     # escape attribute value
    138     try:
    139         if "&" in text:
    140             text = text.replace("&", "&amp;")
    141         if "<" in text:
    142             text = text.replace("<", "&lt;")
    143         if ">" in text:
    144             text = text.replace(">", "&gt;")
    145         if "\"" in text:
    146             text = text.replace("\"", "&quot;")
    147         if "\n" in text:
    148             text = text.replace("\n", "&#10;")
    149         return text
    150     except (TypeError, AttributeError):
    151         _raise_serialization_error(text)
    152 
    153 def _escape_attrib_html(text):
    154     # escape attribute value
    155     try:
    156         if "&" in text:
    157             text = text.replace("&", "&amp;")
    158         if "<" in text:
    159             text = text.replace("<", "&lt;")
    160         if ">" in text:
    161             text = text.replace(">", "&gt;")
    162         if "\"" in text:
    163             text = text.replace("\"", "&quot;")
    164         return text
    165     except (TypeError, AttributeError):
    166         _raise_serialization_error(text)
    167 
    168 
    169 def _serialize_html(write, elem, qnames, namespaces, format):
    170     tag = elem.tag
    171     text = elem.text
    172     if tag is Comment:
    173         write("<!--%s-->" % _escape_cdata(text))
    174     elif tag is ProcessingInstruction:
    175         write("<?%s?>" % _escape_cdata(text))
    176     else:
    177         tag = qnames[tag]
    178         if tag is None:
    179             if text:
    180                 write(_escape_cdata(text))
    181             for e in elem:
    182                 _serialize_html(write, e, qnames, None, format)
    183         else:
    184             write("<" + tag)
    185             items = elem.items()
    186             if items or namespaces:
    187                 items.sort() # lexical order
    188                 for k, v in items:
    189                     if isinstance(k, QName):
    190                         k = k.text
    191                     if isinstance(v, QName):
    192                         v = qnames[v.text]
    193                     else:
    194                         v = _escape_attrib_html(v)
    195                     if qnames[k] == v and format == 'html':
    196                         # handle boolean attributes
    197                         write(" %s" % v)
    198                     else:
    199                         write(" %s=\"%s\"" % (qnames[k], v))
    200                 if namespaces:
    201                     items = namespaces.items()
    202                     items.sort(key=lambda x: x[1]) # sort on prefix
    203                     for v, k in items:
    204                         if k:
    205                             k = ":" + k
    206                         write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))
    207             if format == "xhtml" and tag in HTML_EMPTY:
    208                 write(" />")
    209             else:
    210                 write(">")
    211                 tag = tag.lower()
    212                 if text:
    213                     if tag == "script" or tag == "style":
    214                         write(text)
    215                     else:
    216                         write(_escape_cdata(text))
    217                 for e in elem:
    218                     _serialize_html(write, e, qnames, None, format)
    219                 if tag not in HTML_EMPTY:
    220                     write("</" + tag + ">")
    221     if elem.tail:
    222         write(_escape_cdata(elem.tail))
    223 
    224 def _write_html(root,
    225                 encoding=None,
    226                 default_namespace=None,
    227                 format="html"):
    228     assert root is not None
    229     data = []
    230     write = data.append
    231     qnames, namespaces = _namespaces(root, default_namespace)
    232     _serialize_html(write, root, qnames, namespaces, format)
    233     if encoding is None:
    234         return "".join(data)
    235     else:
    236         return _encode("".join(data))
    237 
    238 
    239 # --------------------------------------------------------------------
    240 # serialization support
    241 
    242 def _namespaces(elem, default_namespace=None):
    243     # identify namespaces used in this tree
    244 
    245     # maps qnames to *encoded* prefix:local names
    246     qnames = {None: None}
    247 
    248     # maps uri:s to prefixes
    249     namespaces = {}
    250     if default_namespace:
    251         namespaces[default_namespace] = ""
    252 
    253     def add_qname(qname):
    254         # calculate serialized qname representation
    255         try:
    256             if qname[:1] == "{":
    257                 uri, tag = qname[1:].split("}", 1)
    258                 prefix = namespaces.get(uri)
    259                 if prefix is None:
    260                     prefix = _namespace_map.get(uri)
    261                     if prefix is None:
    262                         prefix = "ns%d" % len(namespaces)
    263                     if prefix != "xml":
    264                         namespaces[uri] = prefix
    265                 if prefix:
    266                     qnames[qname] = "%s:%s" % (prefix, tag)
    267                 else:
    268                     qnames[qname] = tag # default element
    269             else:
    270                 if default_namespace:
    271                     raise ValueError(
    272                         "cannot use non-qualified names with "
    273                         "default_namespace option"
    274                         )
    275                 qnames[qname] = qname
    276         except TypeError:
    277             _raise_serialization_error(qname)
    278 
    279     # populate qname and namespaces table
    280     try:
    281         iterate = elem.iter
    282     except AttributeError:
    283         iterate = elem.getiterator # cET compatibility
    284     for elem in iterate():
    285         tag = elem.tag
    286         if isinstance(tag, QName) and tag.text not in qnames:
    287             add_qname(tag.text)
    288         elif isinstance(tag, util.string_type):
    289             if tag not in qnames:
    290                 add_qname(tag)
    291         elif tag is not None and tag is not Comment and tag is not PI:
    292             _raise_serialization_error(tag)
    293         for key, value in elem.items():
    294             if isinstance(key, QName):
    295                 key = key.text
    296             if key not in qnames:
    297                 add_qname(key)
    298             if isinstance(value, QName) and value.text not in qnames:
    299                 add_qname(value.text)
    300         text = elem.text
    301         if isinstance(text, QName) and text.text not in qnames:
    302             add_qname(text.text)
    303     return qnames, namespaces
    304 
    305 def to_html_string(element):
    306     return _write_html(ElementTree(element).getroot(), format="html")
    307 
    308 def to_xhtml_string(element):
    309     return _write_html(ElementTree(element).getroot(), format="xhtml")
    310