1 """Implementation of the DOM Level 3 'LS-Load' feature.""" 2 3 import copy 4 import xml.dom 5 6 from xml.dom.NodeFilter import NodeFilter 7 8 9 __all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] 10 11 12 class Options: 13 """Features object that has variables set for each DOMBuilder feature. 14 15 The DOMBuilder class uses an instance of this class to pass settings to 16 the ExpatBuilder class. 17 """ 18 19 # Note that the DOMBuilder class in LoadSave constrains which of these 20 # values can be set using the DOM Level 3 LoadSave feature. 21 22 namespaces = 1 23 namespace_declarations = True 24 validation = False 25 external_parameter_entities = True 26 external_general_entities = True 27 external_dtd_subset = True 28 validate_if_schema = False 29 validate = False 30 datatype_normalization = False 31 create_entity_ref_nodes = True 32 entities = True 33 whitespace_in_element_content = True 34 cdata_sections = True 35 comments = True 36 charset_overrides_xml_encoding = True 37 infoset = False 38 supported_mediatypes_only = False 39 40 errorHandler = None 41 filter = None 42 43 44 class DOMBuilder: 45 entityResolver = None 46 errorHandler = None 47 filter = None 48 49 ACTION_REPLACE = 1 50 ACTION_APPEND_AS_CHILDREN = 2 51 ACTION_INSERT_AFTER = 3 52 ACTION_INSERT_BEFORE = 4 53 54 _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, 55 ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) 56 57 def __init__(self): 58 self._options = Options() 59 60 def _get_entityResolver(self): 61 return self.entityResolver 62 def _set_entityResolver(self, entityResolver): 63 self.entityResolver = entityResolver 64 65 def _get_errorHandler(self): 66 return self.errorHandler 67 def _set_errorHandler(self, errorHandler): 68 self.errorHandler = errorHandler 69 70 def _get_filter(self): 71 return self.filter 72 def _set_filter(self, filter): 73 self.filter = filter 74 75 def setFeature(self, name, state): 76 if self.supportsFeature(name): 77 state = state and 1 or 0 78 try: 79 settings = self._settings[(_name_xform(name), state)] 80 except KeyError: 81 raise xml.dom.NotSupportedErr( 82 "unsupported feature: %r" % (name,)) 83 else: 84 for name, value in settings: 85 setattr(self._options, name, value) 86 else: 87 raise xml.dom.NotFoundErr("unknown feature: " + repr(name)) 88 89 def supportsFeature(self, name): 90 return hasattr(self._options, _name_xform(name)) 91 92 def canSetFeature(self, name, state): 93 key = (_name_xform(name), state and 1 or 0) 94 return key in self._settings 95 96 # This dictionary maps from (feature,value) to a list of 97 # (option,value) pairs that should be set on the Options object. 98 # If a (feature,value) setting is not in this dictionary, it is 99 # not supported by the DOMBuilder. 100 # 101 _settings = { 102 ("namespace_declarations", 0): [ 103 ("namespace_declarations", 0)], 104 ("namespace_declarations", 1): [ 105 ("namespace_declarations", 1)], 106 ("validation", 0): [ 107 ("validation", 0)], 108 ("external_general_entities", 0): [ 109 ("external_general_entities", 0)], 110 ("external_general_entities", 1): [ 111 ("external_general_entities", 1)], 112 ("external_parameter_entities", 0): [ 113 ("external_parameter_entities", 0)], 114 ("external_parameter_entities", 1): [ 115 ("external_parameter_entities", 1)], 116 ("validate_if_schema", 0): [ 117 ("validate_if_schema", 0)], 118 ("create_entity_ref_nodes", 0): [ 119 ("create_entity_ref_nodes", 0)], 120 ("create_entity_ref_nodes", 1): [ 121 ("create_entity_ref_nodes", 1)], 122 ("entities", 0): [ 123 ("create_entity_ref_nodes", 0), 124 ("entities", 0)], 125 ("entities", 1): [ 126 ("entities", 1)], 127 ("whitespace_in_element_content", 0): [ 128 ("whitespace_in_element_content", 0)], 129 ("whitespace_in_element_content", 1): [ 130 ("whitespace_in_element_content", 1)], 131 ("cdata_sections", 0): [ 132 ("cdata_sections", 0)], 133 ("cdata_sections", 1): [ 134 ("cdata_sections", 1)], 135 ("comments", 0): [ 136 ("comments", 0)], 137 ("comments", 1): [ 138 ("comments", 1)], 139 ("charset_overrides_xml_encoding", 0): [ 140 ("charset_overrides_xml_encoding", 0)], 141 ("charset_overrides_xml_encoding", 1): [ 142 ("charset_overrides_xml_encoding", 1)], 143 ("infoset", 0): [], 144 ("infoset", 1): [ 145 ("namespace_declarations", 0), 146 ("validate_if_schema", 0), 147 ("create_entity_ref_nodes", 0), 148 ("entities", 0), 149 ("cdata_sections", 0), 150 ("datatype_normalization", 1), 151 ("whitespace_in_element_content", 1), 152 ("comments", 1), 153 ("charset_overrides_xml_encoding", 1)], 154 ("supported_mediatypes_only", 0): [ 155 ("supported_mediatypes_only", 0)], 156 ("namespaces", 0): [ 157 ("namespaces", 0)], 158 ("namespaces", 1): [ 159 ("namespaces", 1)], 160 } 161 162 def getFeature(self, name): 163 xname = _name_xform(name) 164 try: 165 return getattr(self._options, xname) 166 except AttributeError: 167 if name == "infoset": 168 options = self._options 169 return (options.datatype_normalization 170 and options.whitespace_in_element_content 171 and options.comments 172 and options.charset_overrides_xml_encoding 173 and not (options.namespace_declarations 174 or options.validate_if_schema 175 or options.create_entity_ref_nodes 176 or options.entities 177 or options.cdata_sections)) 178 raise xml.dom.NotFoundErr("feature %s not known" % repr(name)) 179 180 def parseURI(self, uri): 181 if self.entityResolver: 182 input = self.entityResolver.resolveEntity(None, uri) 183 else: 184 input = DOMEntityResolver().resolveEntity(None, uri) 185 return self.parse(input) 186 187 def parse(self, input): 188 options = copy.copy(self._options) 189 options.filter = self.filter 190 options.errorHandler = self.errorHandler 191 fp = input.byteStream 192 if fp is None and options.systemId: 193 import urllib2 194 fp = urllib2.urlopen(input.systemId) 195 return self._parse_bytestream(fp, options) 196 197 def parseWithContext(self, input, cnode, action): 198 if action not in self._legal_actions: 199 raise ValueError("not a legal action") 200 raise NotImplementedError("Haven't written this yet...") 201 202 def _parse_bytestream(self, stream, options): 203 import xml.dom.expatbuilder 204 builder = xml.dom.expatbuilder.makeBuilder(options) 205 return builder.parseFile(stream) 206 207 208 def _name_xform(name): 209 return name.lower().replace('-', '_') 210 211 212 class DOMEntityResolver(object): 213 __slots__ = '_opener', 214 215 def resolveEntity(self, publicId, systemId): 216 assert systemId is not None 217 source = DOMInputSource() 218 source.publicId = publicId 219 source.systemId = systemId 220 source.byteStream = self._get_opener().open(systemId) 221 222 # determine the encoding if the transport provided it 223 source.encoding = self._guess_media_encoding(source) 224 225 # determine the base URI is we can 226 import posixpath, urlparse 227 parts = urlparse.urlparse(systemId) 228 scheme, netloc, path, params, query, fragment = parts 229 # XXX should we check the scheme here as well? 230 if path and not path.endswith("/"): 231 path = posixpath.dirname(path) + "/" 232 parts = scheme, netloc, path, params, query, fragment 233 source.baseURI = urlparse.urlunparse(parts) 234 235 return source 236 237 def _get_opener(self): 238 try: 239 return self._opener 240 except AttributeError: 241 self._opener = self._create_opener() 242 return self._opener 243 244 def _create_opener(self): 245 import urllib2 246 return urllib2.build_opener() 247 248 def _guess_media_encoding(self, source): 249 info = source.byteStream.info() 250 if "Content-Type" in info: 251 for param in info.getplist(): 252 if param.startswith("charset="): 253 return param.split("=", 1)[1].lower() 254 255 256 class DOMInputSource(object): 257 __slots__ = ('byteStream', 'characterStream', 'stringData', 258 'encoding', 'publicId', 'systemId', 'baseURI') 259 260 def __init__(self): 261 self.byteStream = None 262 self.characterStream = None 263 self.stringData = None 264 self.encoding = None 265 self.publicId = None 266 self.systemId = None 267 self.baseURI = None 268 269 def _get_byteStream(self): 270 return self.byteStream 271 def _set_byteStream(self, byteStream): 272 self.byteStream = byteStream 273 274 def _get_characterStream(self): 275 return self.characterStream 276 def _set_characterStream(self, characterStream): 277 self.characterStream = characterStream 278 279 def _get_stringData(self): 280 return self.stringData 281 def _set_stringData(self, data): 282 self.stringData = data 283 284 def _get_encoding(self): 285 return self.encoding 286 def _set_encoding(self, encoding): 287 self.encoding = encoding 288 289 def _get_publicId(self): 290 return self.publicId 291 def _set_publicId(self, publicId): 292 self.publicId = publicId 293 294 def _get_systemId(self): 295 return self.systemId 296 def _set_systemId(self, systemId): 297 self.systemId = systemId 298 299 def _get_baseURI(self): 300 return self.baseURI 301 def _set_baseURI(self, uri): 302 self.baseURI = uri 303 304 305 class DOMBuilderFilter: 306 """Element filter which can be used to tailor construction of 307 a DOM instance. 308 """ 309 310 # There's really no need for this class; concrete implementations 311 # should just implement the endElement() and startElement() 312 # methods as appropriate. Using this makes it easy to only 313 # implement one of them. 314 315 FILTER_ACCEPT = 1 316 FILTER_REJECT = 2 317 FILTER_SKIP = 3 318 FILTER_INTERRUPT = 4 319 320 whatToShow = NodeFilter.SHOW_ALL 321 322 def _get_whatToShow(self): 323 return self.whatToShow 324 325 def acceptNode(self, element): 326 return self.FILTER_ACCEPT 327 328 def startContainer(self, element): 329 return self.FILTER_ACCEPT 330 331 del NodeFilter 332 333 334 class DocumentLS: 335 """Mixin to create documents that conform to the load/save spec.""" 336 337 async = False 338 339 def _get_async(self): 340 return False 341 def _set_async(self, async): 342 if async: 343 raise xml.dom.NotSupportedErr( 344 "asynchronous document loading is not supported") 345 346 def abort(self): 347 # What does it mean to "clear" a document? Does the 348 # documentElement disappear? 349 raise NotImplementedError( 350 "haven't figured out what this means yet") 351 352 def load(self, uri): 353 raise NotImplementedError("haven't written this yet") 354 355 def loadXML(self, source): 356 raise NotImplementedError("haven't written this yet") 357 358 def saveXML(self, snode): 359 if snode is None: 360 snode = self 361 elif snode.ownerDocument is not self: 362 raise xml.dom.WrongDocumentErr() 363 return snode.toxml() 364 365 366 class DOMImplementationLS: 367 MODE_SYNCHRONOUS = 1 368 MODE_ASYNCHRONOUS = 2 369 370 def createDOMBuilder(self, mode, schemaType): 371 if schemaType is not None: 372 raise xml.dom.NotSupportedErr( 373 "schemaType not yet supported") 374 if mode == self.MODE_SYNCHRONOUS: 375 return DOMBuilder() 376 if mode == self.MODE_ASYNCHRONOUS: 377 raise xml.dom.NotSupportedErr( 378 "asynchronous builders are not supported") 379 raise ValueError("unknown value for mode") 380 381 def createDOMWriter(self): 382 raise NotImplementedError( 383 "the writer interface hasn't been written yet!") 384 385 def createDOMInputSource(self): 386 return DOMInputSource() 387