Home | History | Annotate | Download | only in html5lib
      1 from __future__ import absolute_import, division, unicode_literals
      2 
      3 from types import ModuleType
      4 
      5 from six import text_type
      6 
      7 try:
      8     import xml.etree.cElementTree as default_etree
      9 except ImportError:
     10     import xml.etree.ElementTree as default_etree
     11 
     12 
     13 __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
     14            "surrogatePairToCodepoint", "moduleFactoryFactory",
     15            "supports_lone_surrogates"]
     16 
     17 
     18 # Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
     19 # caught by the below test. In general this would be any platform
     20 # using UTF-16 as its encoding of unicode strings, such as
     21 # Jython. This is because UTF-16 itself is based on the use of such
     22 # surrogates, and there is no mechanism to further escape such
     23 # escapes.
     24 try:
     25     _x = eval('"\\uD800"')
     26     if not isinstance(_x, text_type):
     27         # We need this with u"" because of http://bugs.jython.org/issue2039
     28         _x = eval('u"\\uD800"')
     29         assert isinstance(_x, text_type)
     30 except:
     31     supports_lone_surrogates = False
     32 else:
     33     supports_lone_surrogates = True
     34 
     35 
     36 class MethodDispatcher(dict):
     37     """Dict with 2 special properties:
     38 
     39     On initiation, keys that are lists, sets or tuples are converted to
     40     multiple keys so accessing any one of the items in the original
     41     list-like object returns the matching value
     42 
     43     md = MethodDispatcher({("foo", "bar"):"baz"})
     44     md["foo"] == "baz"
     45 
     46     A default value which can be set through the default attribute.
     47     """
     48 
     49     def __init__(self, items=()):
     50         # Using _dictEntries instead of directly assigning to self is about
     51         # twice as fast. Please do careful performance testing before changing
     52         # anything here.
     53         _dictEntries = []
     54         for name, value in items:
     55             if type(name) in (list, tuple, frozenset, set):
     56                 for item in name:
     57                     _dictEntries.append((item, value))
     58             else:
     59                 _dictEntries.append((name, value))
     60         dict.__init__(self, _dictEntries)
     61         self.default = None
     62 
     63     def __getitem__(self, key):
     64         return dict.get(self, key, self.default)
     65 
     66 
     67 # Some utility functions to dal with weirdness around UCS2 vs UCS4
     68 # python builds
     69 
     70 def isSurrogatePair(data):
     71     return (len(data) == 2 and
     72             ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
     73             ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
     74 
     75 
     76 def surrogatePairToCodepoint(data):
     77     char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
     78                 (ord(data[1]) - 0xDC00))
     79     return char_val
     80 
     81 # Module Factory Factory (no, this isn't Java, I know)
     82 # Here to stop this being duplicated all over the place.
     83 
     84 
     85 def moduleFactoryFactory(factory):
     86     moduleCache = {}
     87 
     88     def moduleFactory(baseModule, *args, **kwargs):
     89         if isinstance(ModuleType.__name__, type("")):
     90             name = "_%s_factory" % baseModule.__name__
     91         else:
     92             name = b"_%s_factory" % baseModule.__name__
     93 
     94         if name in moduleCache:
     95             return moduleCache[name]
     96         else:
     97             mod = ModuleType(name)
     98             objs = factory(baseModule, *args, **kwargs)
     99             mod.__dict__.update(objs)
    100             moduleCache[name] = mod
    101             return mod
    102 
    103     return moduleFactory
    104