1 """\ 2 A library of useful helper classes to the SAX classes, for the 3 convenience of application and driver writers. 4 """ 5 6 import os, urlparse, urllib, types 7 import handler 8 import xmlreader 9 10 try: 11 _StringTypes = [types.StringType, types.UnicodeType] 12 except AttributeError: 13 _StringTypes = [types.StringType] 14 15 # See whether the xmlcharrefreplace error handler is 16 # supported 17 try: 18 from codecs import xmlcharrefreplace_errors 19 _error_handling = "xmlcharrefreplace" 20 del xmlcharrefreplace_errors 21 except ImportError: 22 _error_handling = "strict" 23 24 def __dict_replace(s, d): 25 """Replace substrings of a string using a dictionary.""" 26 for key, value in d.items(): 27 s = s.replace(key, value) 28 return s 29 30 def escape(data, entities={}): 31 """Escape &, <, and > in a string of data. 32 33 You can escape other strings of data by passing a dictionary as 34 the optional entities parameter. The keys and values must all be 35 strings; each key will be replaced with its corresponding value. 36 """ 37 38 # must do ampersand first 39 data = data.replace("&", "&") 40 data = data.replace(">", ">") 41 data = data.replace("<", "<") 42 if entities: 43 data = __dict_replace(data, entities) 44 return data 45 46 def unescape(data, entities={}): 47 """Unescape &, <, and > in a string of data. 48 49 You can unescape other strings of data by passing a dictionary as 50 the optional entities parameter. The keys and values must all be 51 strings; each key will be replaced with its corresponding value. 52 """ 53 data = data.replace("<", "<") 54 data = data.replace(">", ">") 55 if entities: 56 data = __dict_replace(data, entities) 57 # must do ampersand last 58 return data.replace("&", "&") 59 60 def quoteattr(data, entities={}): 61 """Escape and quote an attribute value. 62 63 Escape &, <, and > in a string of data, then quote it for use as 64 an attribute value. The \" character will be escaped as well, if 65 necessary. 66 67 You can escape other strings of data by passing a dictionary as 68 the optional entities parameter. The keys and values must all be 69 strings; each key will be replaced with its corresponding value. 70 """ 71 entities = entities.copy() 72 entities.update({'\n': ' ', '\r': ' ', '\t':'	'}) 73 data = escape(data, entities) 74 if '"' in data: 75 if "'" in data: 76 data = '"%s"' % data.replace('"', """) 77 else: 78 data = "'%s'" % data 79 else: 80 data = '"%s"' % data 81 return data 82 83 84 class XMLGenerator(handler.ContentHandler): 85 86 def __init__(self, out=None, encoding="iso-8859-1"): 87 if out is None: 88 import sys 89 out = sys.stdout 90 handler.ContentHandler.__init__(self) 91 self._out = out 92 self._ns_contexts = [{}] # contains uri -> prefix dicts 93 self._current_context = self._ns_contexts[-1] 94 self._undeclared_ns_maps = [] 95 self._encoding = encoding 96 97 def _write(self, text): 98 if isinstance(text, str): 99 self._out.write(text) 100 else: 101 self._out.write(text.encode(self._encoding, _error_handling)) 102 103 def _qname(self, name): 104 """Builds a qualified name from a (ns_url, localname) pair""" 105 if name[0]: 106 # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is 107 # bound by definition to http://www.w3.org/XML/1998/namespace. It 108 # does not need to be declared and will not usually be found in 109 # self._current_context. 110 if 'http://www.w3.org/XML/1998/namespace' == name[0]: 111 return 'xml:' + name[1] 112 # The name is in a non-empty namespace 113 prefix = self._current_context[name[0]] 114 if prefix: 115 # If it is not the default namespace, prepend the prefix 116 return prefix + ":" + name[1] 117 # Return the unqualified name 118 return name[1] 119 120 # ContentHandler methods 121 122 def startDocument(self): 123 self._write('<?xml version="1.0" encoding="%s"?>\n' % 124 self._encoding) 125 126 def startPrefixMapping(self, prefix, uri): 127 self._ns_contexts.append(self._current_context.copy()) 128 self._current_context[uri] = prefix 129 self._undeclared_ns_maps.append((prefix, uri)) 130 131 def endPrefixMapping(self, prefix): 132 self._current_context = self._ns_contexts[-1] 133 del self._ns_contexts[-1] 134 135 def startElement(self, name, attrs): 136 self._write('<' + name) 137 for (name, value) in attrs.items(): 138 self._write(' %s=%s' % (name, quoteattr(value))) 139 self._write('>') 140 141 def endElement(self, name): 142 self._write('</%s>' % name) 143 144 def startElementNS(self, name, qname, attrs): 145 self._write('<' + self._qname(name)) 146 147 for prefix, uri in self._undeclared_ns_maps: 148 if prefix: 149 self._out.write(' xmlns:%s="%s"' % (prefix, uri)) 150 else: 151 self._out.write(' xmlns="%s"' % uri) 152 self._undeclared_ns_maps = [] 153 154 for (name, value) in attrs.items(): 155 self._write(' %s=%s' % (self._qname(name), quoteattr(value))) 156 self._write('>') 157 158 def endElementNS(self, name, qname): 159 self._write('</%s>' % self._qname(name)) 160 161 def characters(self, content): 162 self._write(escape(content)) 163 164 def ignorableWhitespace(self, content): 165 self._write(content) 166 167 def processingInstruction(self, target, data): 168 self._write('<?%s %s?>' % (target, data)) 169 170 171 class XMLFilterBase(xmlreader.XMLReader): 172 """This class is designed to sit between an XMLReader and the 173 client application's event handlers. By default, it does nothing 174 but pass requests up to the reader and events on to the handlers 175 unmodified, but subclasses can override specific methods to modify 176 the event stream or the configuration requests as they pass 177 through.""" 178 179 def __init__(self, parent = None): 180 xmlreader.XMLReader.__init__(self) 181 self._parent = parent 182 183 # ErrorHandler methods 184 185 def error(self, exception): 186 self._err_handler.error(exception) 187 188 def fatalError(self, exception): 189 self._err_handler.fatalError(exception) 190 191 def warning(self, exception): 192 self._err_handler.warning(exception) 193 194 # ContentHandler methods 195 196 def setDocumentLocator(self, locator): 197 self._cont_handler.setDocumentLocator(locator) 198 199 def startDocument(self): 200 self._cont_handler.startDocument() 201 202 def endDocument(self): 203 self._cont_handler.endDocument() 204 205 def startPrefixMapping(self, prefix, uri): 206 self._cont_handler.startPrefixMapping(prefix, uri) 207 208 def endPrefixMapping(self, prefix): 209 self._cont_handler.endPrefixMapping(prefix) 210 211 def startElement(self, name, attrs): 212 self._cont_handler.startElement(name, attrs) 213 214 def endElement(self, name): 215 self._cont_handler.endElement(name) 216 217 def startElementNS(self, name, qname, attrs): 218 self._cont_handler.startElementNS(name, qname, attrs) 219 220 def endElementNS(self, name, qname): 221 self._cont_handler.endElementNS(name, qname) 222 223 def characters(self, content): 224 self._cont_handler.characters(content) 225 226 def ignorableWhitespace(self, chars): 227 self._cont_handler.ignorableWhitespace(chars) 228 229 def processingInstruction(self, target, data): 230 self._cont_handler.processingInstruction(target, data) 231 232 def skippedEntity(self, name): 233 self._cont_handler.skippedEntity(name) 234 235 # DTDHandler methods 236 237 def notationDecl(self, name, publicId, systemId): 238 self._dtd_handler.notationDecl(name, publicId, systemId) 239 240 def unparsedEntityDecl(self, name, publicId, systemId, ndata): 241 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) 242 243 # EntityResolver methods 244 245 def resolveEntity(self, publicId, systemId): 246 return self._ent_handler.resolveEntity(publicId, systemId) 247 248 # XMLReader methods 249 250 def parse(self, source): 251 self._parent.setContentHandler(self) 252 self._parent.setErrorHandler(self) 253 self._parent.setEntityResolver(self) 254 self._parent.setDTDHandler(self) 255 self._parent.parse(source) 256 257 def setLocale(self, locale): 258 self._parent.setLocale(locale) 259 260 def getFeature(self, name): 261 return self._parent.getFeature(name) 262 263 def setFeature(self, name, state): 264 self._parent.setFeature(name, state) 265 266 def getProperty(self, name): 267 return self._parent.getProperty(name) 268 269 def setProperty(self, name, value): 270 self._parent.setProperty(name, value) 271 272 # XMLFilter methods 273 274 def getParent(self): 275 return self._parent 276 277 def setParent(self, parent): 278 self._parent = parent 279 280 # --- Utility functions 281 282 def prepare_input_source(source, base = ""): 283 """This function takes an InputSource and an optional base URL and 284 returns a fully resolved InputSource object ready for reading.""" 285 286 if type(source) in _StringTypes: 287 source = xmlreader.InputSource(source) 288 elif hasattr(source, "read"): 289 f = source 290 source = xmlreader.InputSource() 291 source.setByteStream(f) 292 if hasattr(f, "name"): 293 source.setSystemId(f.name) 294 295 if source.getByteStream() is None: 296 sysid = source.getSystemId() 297 basehead = os.path.dirname(os.path.normpath(base)) 298 sysidfilename = os.path.join(basehead, sysid) 299 if os.path.isfile(sysidfilename): 300 source.setSystemId(sysidfilename) 301 f = open(sysidfilename, "rb") 302 else: 303 source.setSystemId(urlparse.urljoin(base, sysid)) 304 f = urllib.urlopen(source.getSystemId()) 305 306 source.setByteStream(f) 307 308 return source 309