1 """\ 2 A library of useful helper classes to the SAX classes, for the 3 convenience of application and driver writers. 4 """ 5 6 import os, urlparse, urllib, types 7 import io 8 import sys 9 import handler 10 import xmlreader 11 12 try: 13 _StringTypes = [types.StringType, types.UnicodeType] 14 except AttributeError: 15 _StringTypes = [types.StringType] 16 17 def __dict_replace(s, d): 18 """Replace substrings of a string using a dictionary.""" 19 for key, value in d.items(): 20 s = s.replace(key, value) 21 return s 22 23 def escape(data, entities={}): 24 """Escape &, <, and > in a string of data. 25 26 You can escape other strings of data by passing a dictionary as 27 the optional entities parameter. The keys and values must all be 28 strings; each key will be replaced with its corresponding value. 29 """ 30 31 # must do ampersand first 32 data = data.replace("&", "&") 33 data = data.replace(">", ">") 34 data = data.replace("<", "<") 35 if entities: 36 data = __dict_replace(data, entities) 37 return data 38 39 def unescape(data, entities={}): 40 """Unescape &, <, and > in a string of data. 41 42 You can unescape other strings of data by passing a dictionary as 43 the optional entities parameter. The keys and values must all be 44 strings; each key will be replaced with its corresponding value. 45 """ 46 data = data.replace("<", "<") 47 data = data.replace(">", ">") 48 if entities: 49 data = __dict_replace(data, entities) 50 # must do ampersand last 51 return data.replace("&", "&") 52 53 def quoteattr(data, entities={}): 54 """Escape and quote an attribute value. 55 56 Escape &, <, and > in a string of data, then quote it for use as 57 an attribute value. The \" character will be escaped as well, if 58 necessary. 59 60 You can escape other strings of data by passing a dictionary as 61 the optional entities parameter. The keys and values must all be 62 strings; each key will be replaced with its corresponding value. 63 """ 64 entities = entities.copy() 65 entities.update({'\n': ' ', '\r': ' ', '\t':'	'}) 66 data = escape(data, entities) 67 if '"' in data: 68 if "'" in data: 69 data = '"%s"' % data.replace('"', """) 70 else: 71 data = "'%s'" % data 72 else: 73 data = '"%s"' % data 74 return data 75 76 77 def _gettextwriter(out, encoding): 78 if out is None: 79 import sys 80 out = sys.stdout 81 82 if isinstance(out, io.RawIOBase): 83 buffer = io.BufferedIOBase(out) 84 # Keep the original file open when the TextIOWrapper is 85 # destroyed 86 buffer.close = lambda: None 87 else: 88 # This is to handle passed objects that aren't in the 89 # IOBase hierarchy, but just have a write method 90 buffer = io.BufferedIOBase() 91 buffer.writable = lambda: True 92 buffer.write = out.write 93 try: 94 # TextIOWrapper uses this methods to determine 95 # if BOM (for UTF-16, etc) should be added 96 buffer.seekable = out.seekable 97 buffer.tell = out.tell 98 except AttributeError: 99 pass 100 # wrap a binary writer with TextIOWrapper 101 class UnbufferedTextIOWrapper(io.TextIOWrapper): 102 def write(self, s): 103 super(UnbufferedTextIOWrapper, self).write(s) 104 self.flush() 105 return UnbufferedTextIOWrapper(buffer, encoding=encoding, 106 errors='xmlcharrefreplace', 107 newline='\n') 108 109 class XMLGenerator(handler.ContentHandler): 110 111 def __init__(self, out=None, encoding="iso-8859-1"): 112 handler.ContentHandler.__init__(self) 113 out = _gettextwriter(out, encoding) 114 self._write = out.write 115 self._flush = out.flush 116 self._ns_contexts = [{}] # contains uri -> prefix dicts 117 self._current_context = self._ns_contexts[-1] 118 self._undeclared_ns_maps = [] 119 self._encoding = encoding 120 121 def _qname(self, name): 122 """Builds a qualified name from a (ns_url, localname) pair""" 123 if name[0]: 124 # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is 125 # bound by definition to http://www.w3.org/XML/1998/namespace. It 126 # does not need to be declared and will not usually be found in 127 # self._current_context. 128 if 'http://www.w3.org/XML/1998/namespace' == name[0]: 129 return 'xml:' + name[1] 130 # The name is in a non-empty namespace 131 prefix = self._current_context[name[0]] 132 if prefix: 133 # If it is not the default namespace, prepend the prefix 134 return prefix + ":" + name[1] 135 # Return the unqualified name 136 return name[1] 137 138 # ContentHandler methods 139 140 def startDocument(self): 141 self._write(u'<?xml version="1.0" encoding="%s"?>\n' % 142 self._encoding) 143 144 def endDocument(self): 145 self._flush() 146 147 def startPrefixMapping(self, prefix, uri): 148 self._ns_contexts.append(self._current_context.copy()) 149 self._current_context[uri] = prefix 150 self._undeclared_ns_maps.append((prefix, uri)) 151 152 def endPrefixMapping(self, prefix): 153 self._current_context = self._ns_contexts[-1] 154 del self._ns_contexts[-1] 155 156 def startElement(self, name, attrs): 157 self._write(u'<' + name) 158 for (name, value) in attrs.items(): 159 self._write(u' %s=%s' % (name, quoteattr(value))) 160 self._write(u'>') 161 162 def endElement(self, name): 163 self._write(u'</%s>' % name) 164 165 def startElementNS(self, name, qname, attrs): 166 self._write(u'<' + self._qname(name)) 167 168 for prefix, uri in self._undeclared_ns_maps: 169 if prefix: 170 self._write(u' xmlns:%s="%s"' % (prefix, uri)) 171 else: 172 self._write(u' xmlns="%s"' % uri) 173 self._undeclared_ns_maps = [] 174 175 for (name, value) in attrs.items(): 176 self._write(u' %s=%s' % (self._qname(name), quoteattr(value))) 177 self._write(u'>') 178 179 def endElementNS(self, name, qname): 180 self._write(u'</%s>' % self._qname(name)) 181 182 def characters(self, content): 183 self._write(escape(unicode(content))) 184 185 def ignorableWhitespace(self, content): 186 self._write(unicode(content)) 187 188 def processingInstruction(self, target, data): 189 self._write(u'<?%s %s?>' % (target, data)) 190 191 192 class XMLFilterBase(xmlreader.XMLReader): 193 """This class is designed to sit between an XMLReader and the 194 client application's event handlers. By default, it does nothing 195 but pass requests up to the reader and events on to the handlers 196 unmodified, but subclasses can override specific methods to modify 197 the event stream or the configuration requests as they pass 198 through.""" 199 200 def __init__(self, parent = None): 201 xmlreader.XMLReader.__init__(self) 202 self._parent = parent 203 204 # ErrorHandler methods 205 206 def error(self, exception): 207 self._err_handler.error(exception) 208 209 def fatalError(self, exception): 210 self._err_handler.fatalError(exception) 211 212 def warning(self, exception): 213 self._err_handler.warning(exception) 214 215 # ContentHandler methods 216 217 def setDocumentLocator(self, locator): 218 self._cont_handler.setDocumentLocator(locator) 219 220 def startDocument(self): 221 self._cont_handler.startDocument() 222 223 def endDocument(self): 224 self._cont_handler.endDocument() 225 226 def startPrefixMapping(self, prefix, uri): 227 self._cont_handler.startPrefixMapping(prefix, uri) 228 229 def endPrefixMapping(self, prefix): 230 self._cont_handler.endPrefixMapping(prefix) 231 232 def startElement(self, name, attrs): 233 self._cont_handler.startElement(name, attrs) 234 235 def endElement(self, name): 236 self._cont_handler.endElement(name) 237 238 def startElementNS(self, name, qname, attrs): 239 self._cont_handler.startElementNS(name, qname, attrs) 240 241 def endElementNS(self, name, qname): 242 self._cont_handler.endElementNS(name, qname) 243 244 def characters(self, content): 245 self._cont_handler.characters(content) 246 247 def ignorableWhitespace(self, chars): 248 self._cont_handler.ignorableWhitespace(chars) 249 250 def processingInstruction(self, target, data): 251 self._cont_handler.processingInstruction(target, data) 252 253 def skippedEntity(self, name): 254 self._cont_handler.skippedEntity(name) 255 256 # DTDHandler methods 257 258 def notationDecl(self, name, publicId, systemId): 259 self._dtd_handler.notationDecl(name, publicId, systemId) 260 261 def unparsedEntityDecl(self, name, publicId, systemId, ndata): 262 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) 263 264 # EntityResolver methods 265 266 def resolveEntity(self, publicId, systemId): 267 return self._ent_handler.resolveEntity(publicId, systemId) 268 269 # XMLReader methods 270 271 def parse(self, source): 272 self._parent.setContentHandler(self) 273 self._parent.setErrorHandler(self) 274 self._parent.setEntityResolver(self) 275 self._parent.setDTDHandler(self) 276 self._parent.parse(source) 277 278 def setLocale(self, locale): 279 self._parent.setLocale(locale) 280 281 def getFeature(self, name): 282 return self._parent.getFeature(name) 283 284 def setFeature(self, name, state): 285 self._parent.setFeature(name, state) 286 287 def getProperty(self, name): 288 return self._parent.getProperty(name) 289 290 def setProperty(self, name, value): 291 self._parent.setProperty(name, value) 292 293 # XMLFilter methods 294 295 def getParent(self): 296 return self._parent 297 298 def setParent(self, parent): 299 self._parent = parent 300 301 # --- Utility functions 302 303 def prepare_input_source(source, base = ""): 304 """This function takes an InputSource and an optional base URL and 305 returns a fully resolved InputSource object ready for reading.""" 306 307 if type(source) in _StringTypes: 308 source = xmlreader.InputSource(source) 309 elif hasattr(source, "read"): 310 f = source 311 source = xmlreader.InputSource() 312 source.setByteStream(f) 313 if hasattr(f, "name"): 314 source.setSystemId(f.name) 315 316 if source.getByteStream() is None: 317 try: 318 sysid = source.getSystemId() 319 basehead = os.path.dirname(os.path.normpath(base)) 320 encoding = sys.getfilesystemencoding() 321 if isinstance(sysid, unicode): 322 if not isinstance(basehead, unicode): 323 try: 324 basehead = basehead.decode(encoding) 325 except UnicodeDecodeError: 326 sysid = sysid.encode(encoding) 327 else: 328 if isinstance(basehead, unicode): 329 try: 330 sysid = sysid.decode(encoding) 331 except UnicodeDecodeError: 332 basehead = basehead.encode(encoding) 333 sysidfilename = os.path.join(basehead, sysid) 334 isfile = os.path.isfile(sysidfilename) 335 except UnicodeError: 336 isfile = False 337 if isfile: 338 source.setSystemId(sysidfilename) 339 f = open(sysidfilename, "rb") 340 else: 341 source.setSystemId(urlparse.urljoin(base, source.getSystemId())) 342 f = urllib.urlopen(source.getSystemId()) 343 344 source.setByteStream(f) 345 346 return source 347