Home | History | Annotate | Download | only in sax
      1 """\
      2 A library of useful helper classes to the SAX classes, for the
      3 convenience of application and driver writers.
      4 """
      5 
      6 import os, urlparse, urllib, types
      7 import handler
      8 import xmlreader
      9 
     10 try:
     11     _StringTypes = [types.StringType, types.UnicodeType]
     12 except AttributeError:
     13     _StringTypes = [types.StringType]
     14 
     15 # See whether the xmlcharrefreplace error handler is

     16 # supported

     17 try:
     18     from codecs import xmlcharrefreplace_errors
     19     _error_handling = "xmlcharrefreplace"
     20     del xmlcharrefreplace_errors
     21 except ImportError:
     22     _error_handling = "strict"
     23 
     24 def __dict_replace(s, d):
     25     """Replace substrings of a string using a dictionary."""
     26     for key, value in d.items():
     27         s = s.replace(key, value)
     28     return s
     29 
     30 def escape(data, entities={}):
     31     """Escape &, <, and > in a string of data.
     32 
     33     You can escape other strings of data by passing a dictionary as
     34     the optional entities parameter.  The keys and values must all be
     35     strings; each key will be replaced with its corresponding value.
     36     """
     37 
     38     # must do ampersand first

     39     data = data.replace("&", "&amp;")
     40     data = data.replace(">", "&gt;")
     41     data = data.replace("<", "&lt;")
     42     if entities:
     43         data = __dict_replace(data, entities)
     44     return data
     45 
     46 def unescape(data, entities={}):
     47     """Unescape &amp;, &lt;, and &gt; in a string of data.
     48 
     49     You can unescape other strings of data by passing a dictionary as
     50     the optional entities parameter.  The keys and values must all be
     51     strings; each key will be replaced with its corresponding value.
     52     """
     53     data = data.replace("&lt;", "<")
     54     data = data.replace("&gt;", ">")
     55     if entities:
     56         data = __dict_replace(data, entities)
     57     # must do ampersand last

     58     return data.replace("&amp;", "&")
     59 
     60 def quoteattr(data, entities={}):
     61     """Escape and quote an attribute value.
     62 
     63     Escape &, <, and > in a string of data, then quote it for use as
     64     an attribute value.  The \" character will be escaped as well, if
     65     necessary.
     66 
     67     You can escape other strings of data by passing a dictionary as
     68     the optional entities parameter.  The keys and values must all be
     69     strings; each key will be replaced with its corresponding value.
     70     """
     71     entities = entities.copy()
     72     entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
     73     data = escape(data, entities)
     74     if '"' in data:
     75         if "'" in data:
     76             data = '"%s"' % data.replace('"', "&quot;")
     77         else:
     78             data = "'%s'" % data
     79     else:
     80         data = '"%s"' % data
     81     return data
     82 
     83 
     84 class XMLGenerator(handler.ContentHandler):
     85 
     86     def __init__(self, out=None, encoding="iso-8859-1"):
     87         if out is None:
     88             import sys
     89             out = sys.stdout
     90         handler.ContentHandler.__init__(self)
     91         self._out = out
     92         self._ns_contexts = [{}] # contains uri -> prefix dicts

     93         self._current_context = self._ns_contexts[-1]
     94         self._undeclared_ns_maps = []
     95         self._encoding = encoding
     96 
     97     def _write(self, text):
     98         if isinstance(text, str):
     99             self._out.write(text)
    100         else:
    101             self._out.write(text.encode(self._encoding, _error_handling))
    102 
    103     def _qname(self, name):
    104         """Builds a qualified name from a (ns_url, localname) pair"""
    105         if name[0]:
    106             # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is

    107             # bound by definition to http://www.w3.org/XML/1998/namespace.  It

    108             # does not need to be declared and will not usually be found in

    109             # self._current_context.

    110             if 'http://www.w3.org/XML/1998/namespace' == name[0]:
    111                 return 'xml:' + name[1]
    112             # The name is in a non-empty namespace

    113             prefix = self._current_context[name[0]]
    114             if prefix:
    115                 # If it is not the default namespace, prepend the prefix

    116                 return prefix + ":" + name[1]
    117         # Return the unqualified name

    118         return name[1]
    119 
    120     # ContentHandler methods

    121 
    122     def startDocument(self):
    123         self._write('<?xml version="1.0" encoding="%s"?>\n' %
    124                         self._encoding)
    125 
    126     def startPrefixMapping(self, prefix, uri):
    127         self._ns_contexts.append(self._current_context.copy())
    128         self._current_context[uri] = prefix
    129         self._undeclared_ns_maps.append((prefix, uri))
    130 
    131     def endPrefixMapping(self, prefix):
    132         self._current_context = self._ns_contexts[-1]
    133         del self._ns_contexts[-1]
    134 
    135     def startElement(self, name, attrs):
    136         self._write('<' + name)
    137         for (name, value) in attrs.items():
    138             self._write(' %s=%s' % (name, quoteattr(value)))
    139         self._write('>')
    140 
    141     def endElement(self, name):
    142         self._write('</%s>' % name)
    143 
    144     def startElementNS(self, name, qname, attrs):
    145         self._write('<' + self._qname(name))
    146 
    147         for prefix, uri in self._undeclared_ns_maps:
    148             if prefix:
    149                 self._out.write(' xmlns:%s="%s"' % (prefix, uri))
    150             else:
    151                 self._out.write(' xmlns="%s"' % uri)
    152         self._undeclared_ns_maps = []
    153 
    154         for (name, value) in attrs.items():
    155             self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
    156         self._write('>')
    157 
    158     def endElementNS(self, name, qname):
    159         self._write('</%s>' % self._qname(name))
    160 
    161     def characters(self, content):
    162         self._write(escape(content))
    163 
    164     def ignorableWhitespace(self, content):
    165         self._write(content)
    166 
    167     def processingInstruction(self, target, data):
    168         self._write('<?%s %s?>' % (target, data))
    169 
    170 
    171 class XMLFilterBase(xmlreader.XMLReader):
    172     """This class is designed to sit between an XMLReader and the
    173     client application's event handlers.  By default, it does nothing
    174     but pass requests up to the reader and events on to the handlers
    175     unmodified, but subclasses can override specific methods to modify
    176     the event stream or the configuration requests as they pass
    177     through."""
    178 
    179     def __init__(self, parent = None):
    180         xmlreader.XMLReader.__init__(self)
    181         self._parent = parent
    182 
    183     # ErrorHandler methods

    184 
    185     def error(self, exception):
    186         self._err_handler.error(exception)
    187 
    188     def fatalError(self, exception):
    189         self._err_handler.fatalError(exception)
    190 
    191     def warning(self, exception):
    192         self._err_handler.warning(exception)
    193 
    194     # ContentHandler methods

    195 
    196     def setDocumentLocator(self, locator):
    197         self._cont_handler.setDocumentLocator(locator)
    198 
    199     def startDocument(self):
    200         self._cont_handler.startDocument()
    201 
    202     def endDocument(self):
    203         self._cont_handler.endDocument()
    204 
    205     def startPrefixMapping(self, prefix, uri):
    206         self._cont_handler.startPrefixMapping(prefix, uri)
    207 
    208     def endPrefixMapping(self, prefix):
    209         self._cont_handler.endPrefixMapping(prefix)
    210 
    211     def startElement(self, name, attrs):
    212         self._cont_handler.startElement(name, attrs)
    213 
    214     def endElement(self, name):
    215         self._cont_handler.endElement(name)
    216 
    217     def startElementNS(self, name, qname, attrs):
    218         self._cont_handler.startElementNS(name, qname, attrs)
    219 
    220     def endElementNS(self, name, qname):
    221         self._cont_handler.endElementNS(name, qname)
    222 
    223     def characters(self, content):
    224         self._cont_handler.characters(content)
    225 
    226     def ignorableWhitespace(self, chars):
    227         self._cont_handler.ignorableWhitespace(chars)
    228 
    229     def processingInstruction(self, target, data):
    230         self._cont_handler.processingInstruction(target, data)
    231 
    232     def skippedEntity(self, name):
    233         self._cont_handler.skippedEntity(name)
    234 
    235     # DTDHandler methods

    236 
    237     def notationDecl(self, name, publicId, systemId):
    238         self._dtd_handler.notationDecl(name, publicId, systemId)
    239 
    240     def unparsedEntityDecl(self, name, publicId, systemId, ndata):
    241         self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
    242 
    243     # EntityResolver methods

    244 
    245     def resolveEntity(self, publicId, systemId):
    246         return self._ent_handler.resolveEntity(publicId, systemId)
    247 
    248     # XMLReader methods

    249 
    250     def parse(self, source):
    251         self._parent.setContentHandler(self)
    252         self._parent.setErrorHandler(self)
    253         self._parent.setEntityResolver(self)
    254         self._parent.setDTDHandler(self)
    255         self._parent.parse(source)
    256 
    257     def setLocale(self, locale):
    258         self._parent.setLocale(locale)
    259 
    260     def getFeature(self, name):
    261         return self._parent.getFeature(name)
    262 
    263     def setFeature(self, name, state):
    264         self._parent.setFeature(name, state)
    265 
    266     def getProperty(self, name):
    267         return self._parent.getProperty(name)
    268 
    269     def setProperty(self, name, value):
    270         self._parent.setProperty(name, value)
    271 
    272     # XMLFilter methods

    273 
    274     def getParent(self):
    275         return self._parent
    276 
    277     def setParent(self, parent):
    278         self._parent = parent
    279 
    280 # --- Utility functions

    281 
    282 def prepare_input_source(source, base = ""):
    283     """This function takes an InputSource and an optional base URL and
    284     returns a fully resolved InputSource object ready for reading."""
    285 
    286     if type(source) in _StringTypes:
    287         source = xmlreader.InputSource(source)
    288     elif hasattr(source, "read"):
    289         f = source
    290         source = xmlreader.InputSource()
    291         source.setByteStream(f)
    292         if hasattr(f, "name"):
    293             source.setSystemId(f.name)
    294 
    295     if source.getByteStream() is None:
    296         sysid = source.getSystemId()
    297         basehead = os.path.dirname(os.path.normpath(base))
    298         sysidfilename = os.path.join(basehead, sysid)
    299         if os.path.isfile(sysidfilename):
    300             source.setSystemId(sysidfilename)
    301             f = open(sysidfilename, "rb")
    302         else:
    303             source.setSystemId(urlparse.urljoin(base, sysid))
    304             f = urllib.urlopen(source.getSystemId())
    305 
    306         source.setByteStream(f)
    307 
    308     return source
    309