Home | History | Annotate | Download | only in dom
      1 """Implementation of the DOM Level 3 'LS-Load' feature."""
      2 
      3 import copy
      4 import xml.dom
      5 
      6 from xml.dom.NodeFilter import NodeFilter
      7 
      8 
      9 __all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
     10 
     11 
     12 class Options:
     13     """Features object that has variables set for each DOMBuilder feature.
     14 
     15     The DOMBuilder class uses an instance of this class to pass settings to
     16     the ExpatBuilder class.
     17     """
     18 
     19     # Note that the DOMBuilder class in LoadSave constrains which of these
     20     # values can be set using the DOM Level 3 LoadSave feature.
     21 
     22     namespaces = 1
     23     namespace_declarations = True
     24     validation = False
     25     external_parameter_entities = True
     26     external_general_entities = True
     27     external_dtd_subset = True
     28     validate_if_schema = False
     29     validate = False
     30     datatype_normalization = False
     31     create_entity_ref_nodes = True
     32     entities = True
     33     whitespace_in_element_content = True
     34     cdata_sections = True
     35     comments = True
     36     charset_overrides_xml_encoding = True
     37     infoset = False
     38     supported_mediatypes_only = False
     39 
     40     errorHandler = None
     41     filter = None
     42 
     43 
     44 class DOMBuilder:
     45     entityResolver = None
     46     errorHandler = None
     47     filter = None
     48 
     49     ACTION_REPLACE = 1
     50     ACTION_APPEND_AS_CHILDREN = 2
     51     ACTION_INSERT_AFTER = 3
     52     ACTION_INSERT_BEFORE = 4
     53 
     54     _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
     55                       ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
     56 
     57     def __init__(self):
     58         self._options = Options()
     59 
     60     def _get_entityResolver(self):
     61         return self.entityResolver
     62     def _set_entityResolver(self, entityResolver):
     63         self.entityResolver = entityResolver
     64 
     65     def _get_errorHandler(self):
     66         return self.errorHandler
     67     def _set_errorHandler(self, errorHandler):
     68         self.errorHandler = errorHandler
     69 
     70     def _get_filter(self):
     71         return self.filter
     72     def _set_filter(self, filter):
     73         self.filter = filter
     74 
     75     def setFeature(self, name, state):
     76         if self.supportsFeature(name):
     77             state = state and 1 or 0
     78             try:
     79                 settings = self._settings[(_name_xform(name), state)]
     80             except KeyError:
     81                 raise xml.dom.NotSupportedErr(
     82                     "unsupported feature: %r" % (name,))
     83             else:
     84                 for name, value in settings:
     85                     setattr(self._options, name, value)
     86         else:
     87             raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
     88 
     89     def supportsFeature(self, name):
     90         return hasattr(self._options, _name_xform(name))
     91 
     92     def canSetFeature(self, name, state):
     93         key = (_name_xform(name), state and 1 or 0)
     94         return key in self._settings
     95 
     96     # This dictionary maps from (feature,value) to a list of
     97     # (option,value) pairs that should be set on the Options object.
     98     # If a (feature,value) setting is not in this dictionary, it is
     99     # not supported by the DOMBuilder.
    100     #
    101     _settings = {
    102         ("namespace_declarations", 0): [
    103             ("namespace_declarations", 0)],
    104         ("namespace_declarations", 1): [
    105             ("namespace_declarations", 1)],
    106         ("validation", 0): [
    107             ("validation", 0)],
    108         ("external_general_entities", 0): [
    109             ("external_general_entities", 0)],
    110         ("external_general_entities", 1): [
    111             ("external_general_entities", 1)],
    112         ("external_parameter_entities", 0): [
    113             ("external_parameter_entities", 0)],
    114         ("external_parameter_entities", 1): [
    115             ("external_parameter_entities", 1)],
    116         ("validate_if_schema", 0): [
    117             ("validate_if_schema", 0)],
    118         ("create_entity_ref_nodes", 0): [
    119             ("create_entity_ref_nodes", 0)],
    120         ("create_entity_ref_nodes", 1): [
    121             ("create_entity_ref_nodes", 1)],
    122         ("entities", 0): [
    123             ("create_entity_ref_nodes", 0),
    124             ("entities", 0)],
    125         ("entities", 1): [
    126             ("entities", 1)],
    127         ("whitespace_in_element_content", 0): [
    128             ("whitespace_in_element_content", 0)],
    129         ("whitespace_in_element_content", 1): [
    130             ("whitespace_in_element_content", 1)],
    131         ("cdata_sections", 0): [
    132             ("cdata_sections", 0)],
    133         ("cdata_sections", 1): [
    134             ("cdata_sections", 1)],
    135         ("comments", 0): [
    136             ("comments", 0)],
    137         ("comments", 1): [
    138             ("comments", 1)],
    139         ("charset_overrides_xml_encoding", 0): [
    140             ("charset_overrides_xml_encoding", 0)],
    141         ("charset_overrides_xml_encoding", 1): [
    142             ("charset_overrides_xml_encoding", 1)],
    143         ("infoset", 0): [],
    144         ("infoset", 1): [
    145             ("namespace_declarations", 0),
    146             ("validate_if_schema", 0),
    147             ("create_entity_ref_nodes", 0),
    148             ("entities", 0),
    149             ("cdata_sections", 0),
    150             ("datatype_normalization", 1),
    151             ("whitespace_in_element_content", 1),
    152             ("comments", 1),
    153             ("charset_overrides_xml_encoding", 1)],
    154         ("supported_mediatypes_only", 0): [
    155             ("supported_mediatypes_only", 0)],
    156         ("namespaces", 0): [
    157             ("namespaces", 0)],
    158         ("namespaces", 1): [
    159             ("namespaces", 1)],
    160     }
    161 
    162     def getFeature(self, name):
    163         xname = _name_xform(name)
    164         try:
    165             return getattr(self._options, xname)
    166         except AttributeError:
    167             if name == "infoset":
    168                 options = self._options
    169                 return (options.datatype_normalization
    170                         and options.whitespace_in_element_content
    171                         and options.comments
    172                         and options.charset_overrides_xml_encoding
    173                         and not (options.namespace_declarations
    174                                  or options.validate_if_schema
    175                                  or options.create_entity_ref_nodes
    176                                  or options.entities
    177                                  or options.cdata_sections))
    178             raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
    179 
    180     def parseURI(self, uri):
    181         if self.entityResolver:
    182             input = self.entityResolver.resolveEntity(None, uri)
    183         else:
    184             input = DOMEntityResolver().resolveEntity(None, uri)
    185         return self.parse(input)
    186 
    187     def parse(self, input):
    188         options = copy.copy(self._options)
    189         options.filter = self.filter
    190         options.errorHandler = self.errorHandler
    191         fp = input.byteStream
    192         if fp is None and options.systemId:
    193             import urllib2
    194             fp = urllib2.urlopen(input.systemId)
    195         return self._parse_bytestream(fp, options)
    196 
    197     def parseWithContext(self, input, cnode, action):
    198         if action not in self._legal_actions:
    199             raise ValueError("not a legal action")
    200         raise NotImplementedError("Haven't written this yet...")
    201 
    202     def _parse_bytestream(self, stream, options):
    203         import xml.dom.expatbuilder
    204         builder = xml.dom.expatbuilder.makeBuilder(options)
    205         return builder.parseFile(stream)
    206 
    207 
    208 def _name_xform(name):
    209     return name.lower().replace('-', '_')
    210 
    211 
    212 class DOMEntityResolver(object):
    213     __slots__ = '_opener',
    214 
    215     def resolveEntity(self, publicId, systemId):
    216         assert systemId is not None
    217         source = DOMInputSource()
    218         source.publicId = publicId
    219         source.systemId = systemId
    220         source.byteStream = self._get_opener().open(systemId)
    221 
    222         # determine the encoding if the transport provided it
    223         source.encoding = self._guess_media_encoding(source)
    224 
    225         # determine the base URI is we can
    226         import posixpath, urlparse
    227         parts = urlparse.urlparse(systemId)
    228         scheme, netloc, path, params, query, fragment = parts
    229         # XXX should we check the scheme here as well?
    230         if path and not path.endswith("/"):
    231             path = posixpath.dirname(path) + "/"
    232             parts = scheme, netloc, path, params, query, fragment
    233             source.baseURI = urlparse.urlunparse(parts)
    234 
    235         return source
    236 
    237     def _get_opener(self):
    238         try:
    239             return self._opener
    240         except AttributeError:
    241             self._opener = self._create_opener()
    242             return self._opener
    243 
    244     def _create_opener(self):
    245         import urllib2
    246         return urllib2.build_opener()
    247 
    248     def _guess_media_encoding(self, source):
    249         info = source.byteStream.info()
    250         if "Content-Type" in info:
    251             for param in info.getplist():
    252                 if param.startswith("charset="):
    253                     return param.split("=", 1)[1].lower()
    254 
    255 
    256 class DOMInputSource(object):
    257     __slots__ = ('byteStream', 'characterStream', 'stringData',
    258                  'encoding', 'publicId', 'systemId', 'baseURI')
    259 
    260     def __init__(self):
    261         self.byteStream = None
    262         self.characterStream = None
    263         self.stringData = None
    264         self.encoding = None
    265         self.publicId = None
    266         self.systemId = None
    267         self.baseURI = None
    268 
    269     def _get_byteStream(self):
    270         return self.byteStream
    271     def _set_byteStream(self, byteStream):
    272         self.byteStream = byteStream
    273 
    274     def _get_characterStream(self):
    275         return self.characterStream
    276     def _set_characterStream(self, characterStream):
    277         self.characterStream = characterStream
    278 
    279     def _get_stringData(self):
    280         return self.stringData
    281     def _set_stringData(self, data):
    282         self.stringData = data
    283 
    284     def _get_encoding(self):
    285         return self.encoding
    286     def _set_encoding(self, encoding):
    287         self.encoding = encoding
    288 
    289     def _get_publicId(self):
    290         return self.publicId
    291     def _set_publicId(self, publicId):
    292         self.publicId = publicId
    293 
    294     def _get_systemId(self):
    295         return self.systemId
    296     def _set_systemId(self, systemId):
    297         self.systemId = systemId
    298 
    299     def _get_baseURI(self):
    300         return self.baseURI
    301     def _set_baseURI(self, uri):
    302         self.baseURI = uri
    303 
    304 
    305 class DOMBuilderFilter:
    306     """Element filter which can be used to tailor construction of
    307     a DOM instance.
    308     """
    309 
    310     # There's really no need for this class; concrete implementations
    311     # should just implement the endElement() and startElement()
    312     # methods as appropriate.  Using this makes it easy to only
    313     # implement one of them.
    314 
    315     FILTER_ACCEPT = 1
    316     FILTER_REJECT = 2
    317     FILTER_SKIP = 3
    318     FILTER_INTERRUPT = 4
    319 
    320     whatToShow = NodeFilter.SHOW_ALL
    321 
    322     def _get_whatToShow(self):
    323         return self.whatToShow
    324 
    325     def acceptNode(self, element):
    326         return self.FILTER_ACCEPT
    327 
    328     def startContainer(self, element):
    329         return self.FILTER_ACCEPT
    330 
    331 del NodeFilter
    332 
    333 
    334 class DocumentLS:
    335     """Mixin to create documents that conform to the load/save spec."""
    336 
    337     async = False
    338 
    339     def _get_async(self):
    340         return False
    341     def _set_async(self, async):
    342         if async:
    343             raise xml.dom.NotSupportedErr(
    344                 "asynchronous document loading is not supported")
    345 
    346     def abort(self):
    347         # What does it mean to "clear" a document?  Does the
    348         # documentElement disappear?
    349         raise NotImplementedError(
    350             "haven't figured out what this means yet")
    351 
    352     def load(self, uri):
    353         raise NotImplementedError("haven't written this yet")
    354 
    355     def loadXML(self, source):
    356         raise NotImplementedError("haven't written this yet")
    357 
    358     def saveXML(self, snode):
    359         if snode is None:
    360             snode = self
    361         elif snode.ownerDocument is not self:
    362             raise xml.dom.WrongDocumentErr()
    363         return snode.toxml()
    364 
    365 
    366 class DOMImplementationLS:
    367     MODE_SYNCHRONOUS = 1
    368     MODE_ASYNCHRONOUS = 2
    369 
    370     def createDOMBuilder(self, mode, schemaType):
    371         if schemaType is not None:
    372             raise xml.dom.NotSupportedErr(
    373                 "schemaType not yet supported")
    374         if mode == self.MODE_SYNCHRONOUS:
    375             return DOMBuilder()
    376         if mode == self.MODE_ASYNCHRONOUS:
    377             raise xml.dom.NotSupportedErr(
    378                 "asynchronous builders are not supported")
    379         raise ValueError("unknown value for mode")
    380 
    381     def createDOMWriter(self):
    382         raise NotImplementedError(
    383             "the writer interface hasn't been written yet!")
    384 
    385     def createDOMInputSource(self):
    386         return DOMInputSource()
    387