Home | History | Annotate | Download | only in Lib
      1 r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
      2 
      3 The property list (.plist) file format is a simple XML pickle supporting
      4 basic object types, like dictionaries, lists, numbers and strings.
      5 Usually the top level object is a dictionary.
      6 
      7 To write out a plist file, use the dump(value, file)
      8 function. 'value' is the top level object, 'file' is
      9 a (writable) file object.
     10 
     11 To parse a plist from a file, use the load(file) function,
     12 with a (readable) file object as the only argument. It
     13 returns the top level object (again, usually a dictionary).
     14 
     15 To work with plist data in bytes objects, you can use loads()
     16 and dumps().
     17 
     18 Values can be strings, integers, floats, booleans, tuples, lists,
     19 dictionaries (but only with string keys), Data, bytes, bytearray, or
     20 datetime.datetime objects.
     21 
     22 Generate Plist example:
     23 
     24     pl = dict(
     25         aString = "Doodah",
     26         aList = ["A", "B", 12, 32.1, [1, 2, 3]],
     27         aFloat = 0.1,
     28         anInt = 728,
     29         aDict = dict(
     30             anotherString = "<hello & hi there!>",
     31             aUnicodeValue = "M\xe4ssig, Ma\xdf",
     32             aTrueValue = True,
     33             aFalseValue = False,
     34         ),
     35         someData = b"<binary gunk>",
     36         someMoreData = b"<lots of binary gunk>" * 10,
     37         aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
     38     )
     39     with open(fileName, 'wb') as fp:
     40         dump(pl, fp)
     41 
     42 Parse Plist example:
     43 
     44     with open(fileName, 'rb') as fp:
     45         pl = load(fp)
     46     print(pl["aKey"])
     47 """
     48 __all__ = [
     49     "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
     50     "Plist", "Data", "Dict", "InvalidFileException", "FMT_XML", "FMT_BINARY",
     51     "load", "dump", "loads", "dumps"
     52 ]
     53 
     54 import binascii
     55 import codecs
     56 import contextlib
     57 import datetime
     58 import enum
     59 from io import BytesIO
     60 import itertools
     61 import os
     62 import re
     63 import struct
     64 from warnings import warn
     65 from xml.parsers.expat import ParserCreate
     66 
     67 
     68 PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
     69 globals().update(PlistFormat.__members__)
     70 
     71 
     72 #
     73 #
     74 # Deprecated functionality
     75 #
     76 #
     77 
     78 
     79 class _InternalDict(dict):
     80 
     81     # This class is needed while Dict is scheduled for deprecation:
     82     # we only need to warn when a *user* instantiates Dict or when
     83     # the "attribute notation for dict keys" is used.
     84     __slots__ = ()
     85 
     86     def __getattr__(self, attr):
     87         try:
     88             value = self[attr]
     89         except KeyError:
     90             raise AttributeError(attr)
     91         warn("Attribute access from plist dicts is deprecated, use d[key] "
     92              "notation instead", DeprecationWarning, 2)
     93         return value
     94 
     95     def __setattr__(self, attr, value):
     96         warn("Attribute access from plist dicts is deprecated, use d[key] "
     97              "notation instead", DeprecationWarning, 2)
     98         self[attr] = value
     99 
    100     def __delattr__(self, attr):
    101         try:
    102             del self[attr]
    103         except KeyError:
    104             raise AttributeError(attr)
    105         warn("Attribute access from plist dicts is deprecated, use d[key] "
    106              "notation instead", DeprecationWarning, 2)
    107 
    108 
    109 class Dict(_InternalDict):
    110 
    111     def __init__(self, **kwargs):
    112         warn("The plistlib.Dict class is deprecated, use builtin dict instead",
    113              DeprecationWarning, 2)
    114         super().__init__(**kwargs)
    115 
    116 
    117 @contextlib.contextmanager
    118 def _maybe_open(pathOrFile, mode):
    119     if isinstance(pathOrFile, str):
    120         with open(pathOrFile, mode) as fp:
    121             yield fp
    122 
    123     else:
    124         yield pathOrFile
    125 
    126 
    127 class Plist(_InternalDict):
    128     """This class has been deprecated. Use dump() and load()
    129     functions instead, together with regular dict objects.
    130     """
    131 
    132     def __init__(self, **kwargs):
    133         warn("The Plist class is deprecated, use the load() and "
    134              "dump() functions instead", DeprecationWarning, 2)
    135         super().__init__(**kwargs)
    136 
    137     @classmethod
    138     def fromFile(cls, pathOrFile):
    139         """Deprecated. Use the load() function instead."""
    140         with _maybe_open(pathOrFile, 'rb') as fp:
    141             value = load(fp)
    142         plist = cls()
    143         plist.update(value)
    144         return plist
    145 
    146     def write(self, pathOrFile):
    147         """Deprecated. Use the dump() function instead."""
    148         with _maybe_open(pathOrFile, 'wb') as fp:
    149             dump(self, fp)
    150 
    151 
    152 def readPlist(pathOrFile):
    153     """
    154     Read a .plist from a path or file. pathOrFile should either
    155     be a file name, or a readable binary file object.
    156 
    157     This function is deprecated, use load instead.
    158     """
    159     warn("The readPlist function is deprecated, use load() instead",
    160         DeprecationWarning, 2)
    161 
    162     with _maybe_open(pathOrFile, 'rb') as fp:
    163         return load(fp, fmt=None, use_builtin_types=False,
    164             dict_type=_InternalDict)
    165 
    166 def writePlist(value, pathOrFile):
    167     """
    168     Write 'value' to a .plist file. 'pathOrFile' may either be a
    169     file name or a (writable) file object.
    170 
    171     This function is deprecated, use dump instead.
    172     """
    173     warn("The writePlist function is deprecated, use dump() instead",
    174         DeprecationWarning, 2)
    175     with _maybe_open(pathOrFile, 'wb') as fp:
    176         dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
    177 
    178 
    179 def readPlistFromBytes(data):
    180     """
    181     Read a plist data from a bytes object. Return the root object.
    182 
    183     This function is deprecated, use loads instead.
    184     """
    185     warn("The readPlistFromBytes function is deprecated, use loads() instead",
    186         DeprecationWarning, 2)
    187     return load(BytesIO(data), fmt=None, use_builtin_types=False,
    188         dict_type=_InternalDict)
    189 
    190 
    191 def writePlistToBytes(value):
    192     """
    193     Return 'value' as a plist-formatted bytes object.
    194 
    195     This function is deprecated, use dumps instead.
    196     """
    197     warn("The writePlistToBytes function is deprecated, use dumps() instead",
    198         DeprecationWarning, 2)
    199     f = BytesIO()
    200     dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
    201     return f.getvalue()
    202 
    203 
    204 class Data:
    205     """
    206     Wrapper for binary data.
    207 
    208     This class is deprecated, use a bytes object instead.
    209     """
    210 
    211     def __init__(self, data):
    212         if not isinstance(data, bytes):
    213             raise TypeError("data must be as bytes")
    214         self.data = data
    215 
    216     @classmethod
    217     def fromBase64(cls, data):
    218         # base64.decodebytes just calls binascii.a2b_base64;
    219         # it seems overkill to use both base64 and binascii.
    220         return cls(_decode_base64(data))
    221 
    222     def asBase64(self, maxlinelength=76):
    223         return _encode_base64(self.data, maxlinelength)
    224 
    225     def __eq__(self, other):
    226         if isinstance(other, self.__class__):
    227             return self.data == other.data
    228         elif isinstance(other, bytes):
    229             return self.data == other
    230         else:
    231             return NotImplemented
    232 
    233     def __repr__(self):
    234         return "%s(%s)" % (self.__class__.__name__, repr(self.data))
    235 
    236 #
    237 #
    238 # End of deprecated functionality
    239 #
    240 #
    241 
    242 
    243 #
    244 # XML support
    245 #
    246 
    247 
    248 # XML 'header'
    249 PLISTHEADER = b"""\
    250 <?xml version="1.0" encoding="UTF-8"?>
    251 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
    252 """
    253 
    254 
    255 # Regex to find any control chars, except for \t \n and \r
    256 _controlCharPat = re.compile(
    257     r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
    258     r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
    259 
    260 def _encode_base64(s, maxlinelength=76):
    261     # copied from base64.encodebytes(), with added maxlinelength argument
    262     maxbinsize = (maxlinelength//4)*3
    263     pieces = []
    264     for i in range(0, len(s), maxbinsize):
    265         chunk = s[i : i + maxbinsize]
    266         pieces.append(binascii.b2a_base64(chunk))
    267     return b''.join(pieces)
    268 
    269 def _decode_base64(s):
    270     if isinstance(s, str):
    271         return binascii.a2b_base64(s.encode("utf-8"))
    272 
    273     else:
    274         return binascii.a2b_base64(s)
    275 
    276 # Contents should conform to a subset of ISO 8601
    277 # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'.  Smaller units
    278 # may be omitted with #  a loss of precision)
    279 _dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
    280 
    281 
    282 def _date_from_string(s):
    283     order = ('year', 'month', 'day', 'hour', 'minute', 'second')
    284     gd = _dateParser.match(s).groupdict()
    285     lst = []
    286     for key in order:
    287         val = gd[key]
    288         if val is None:
    289             break
    290         lst.append(int(val))
    291     return datetime.datetime(*lst)
    292 
    293 
    294 def _date_to_string(d):
    295     return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
    296         d.year, d.month, d.day,
    297         d.hour, d.minute, d.second
    298     )
    299 
    300 def _escape(text):
    301     m = _controlCharPat.search(text)
    302     if m is not None:
    303         raise ValueError("strings can't contains control characters; "
    304                          "use bytes instead")
    305     text = text.replace("\r\n", "\n")       # convert DOS line endings
    306     text = text.replace("\r", "\n")         # convert Mac line endings
    307     text = text.replace("&", "&amp;")       # escape '&'
    308     text = text.replace("<", "&lt;")        # escape '<'
    309     text = text.replace(">", "&gt;")        # escape '>'
    310     return text
    311 
    312 class _PlistParser:
    313     def __init__(self, use_builtin_types, dict_type):
    314         self.stack = []
    315         self.current_key = None
    316         self.root = None
    317         self._use_builtin_types = use_builtin_types
    318         self._dict_type = dict_type
    319 
    320     def parse(self, fileobj):
    321         self.parser = ParserCreate()
    322         self.parser.StartElementHandler = self.handle_begin_element
    323         self.parser.EndElementHandler = self.handle_end_element
    324         self.parser.CharacterDataHandler = self.handle_data
    325         self.parser.ParseFile(fileobj)
    326         return self.root
    327 
    328     def handle_begin_element(self, element, attrs):
    329         self.data = []
    330         handler = getattr(self, "begin_" + element, None)
    331         if handler is not None:
    332             handler(attrs)
    333 
    334     def handle_end_element(self, element):
    335         handler = getattr(self, "end_" + element, None)
    336         if handler is not None:
    337             handler()
    338 
    339     def handle_data(self, data):
    340         self.data.append(data)
    341 
    342     def add_object(self, value):
    343         if self.current_key is not None:
    344             if not isinstance(self.stack[-1], type({})):
    345                 raise ValueError("unexpected element at line %d" %
    346                                  self.parser.CurrentLineNumber)
    347             self.stack[-1][self.current_key] = value
    348             self.current_key = None
    349         elif not self.stack:
    350             # this is the root object
    351             self.root = value
    352         else:
    353             if not isinstance(self.stack[-1], type([])):
    354                 raise ValueError("unexpected element at line %d" %
    355                                  self.parser.CurrentLineNumber)
    356             self.stack[-1].append(value)
    357 
    358     def get_data(self):
    359         data = ''.join(self.data)
    360         self.data = []
    361         return data
    362 
    363     # element handlers
    364 
    365     def begin_dict(self, attrs):
    366         d = self._dict_type()
    367         self.add_object(d)
    368         self.stack.append(d)
    369 
    370     def end_dict(self):
    371         if self.current_key:
    372             raise ValueError("missing value for key '%s' at line %d" %
    373                              (self.current_key,self.parser.CurrentLineNumber))
    374         self.stack.pop()
    375 
    376     def end_key(self):
    377         if self.current_key or not isinstance(self.stack[-1], type({})):
    378             raise ValueError("unexpected key at line %d" %
    379                              self.parser.CurrentLineNumber)
    380         self.current_key = self.get_data()
    381 
    382     def begin_array(self, attrs):
    383         a = []
    384         self.add_object(a)
    385         self.stack.append(a)
    386 
    387     def end_array(self):
    388         self.stack.pop()
    389 
    390     def end_true(self):
    391         self.add_object(True)
    392 
    393     def end_false(self):
    394         self.add_object(False)
    395 
    396     def end_integer(self):
    397         self.add_object(int(self.get_data()))
    398 
    399     def end_real(self):
    400         self.add_object(float(self.get_data()))
    401 
    402     def end_string(self):
    403         self.add_object(self.get_data())
    404 
    405     def end_data(self):
    406         if self._use_builtin_types:
    407             self.add_object(_decode_base64(self.get_data()))
    408 
    409         else:
    410             self.add_object(Data.fromBase64(self.get_data()))
    411 
    412     def end_date(self):
    413         self.add_object(_date_from_string(self.get_data()))
    414 
    415 
    416 class _DumbXMLWriter:
    417     def __init__(self, file, indent_level=0, indent="\t"):
    418         self.file = file
    419         self.stack = []
    420         self._indent_level = indent_level
    421         self.indent = indent
    422 
    423     def begin_element(self, element):
    424         self.stack.append(element)
    425         self.writeln("<%s>" % element)
    426         self._indent_level += 1
    427 
    428     def end_element(self, element):
    429         assert self._indent_level > 0
    430         assert self.stack.pop() == element
    431         self._indent_level -= 1
    432         self.writeln("</%s>" % element)
    433 
    434     def simple_element(self, element, value=None):
    435         if value is not None:
    436             value = _escape(value)
    437             self.writeln("<%s>%s</%s>" % (element, value, element))
    438 
    439         else:
    440             self.writeln("<%s/>" % element)
    441 
    442     def writeln(self, line):
    443         if line:
    444             # plist has fixed encoding of utf-8
    445 
    446             # XXX: is this test needed?
    447             if isinstance(line, str):
    448                 line = line.encode('utf-8')
    449             self.file.write(self._indent_level * self.indent)
    450             self.file.write(line)
    451         self.file.write(b'\n')
    452 
    453 
    454 class _PlistWriter(_DumbXMLWriter):
    455     def __init__(
    456             self, file, indent_level=0, indent=b"\t", writeHeader=1,
    457             sort_keys=True, skipkeys=False):
    458 
    459         if writeHeader:
    460             file.write(PLISTHEADER)
    461         _DumbXMLWriter.__init__(self, file, indent_level, indent)
    462         self._sort_keys = sort_keys
    463         self._skipkeys = skipkeys
    464 
    465     def write(self, value):
    466         self.writeln("<plist version=\"1.0\">")
    467         self.write_value(value)
    468         self.writeln("</plist>")
    469 
    470     def write_value(self, value):
    471         if isinstance(value, str):
    472             self.simple_element("string", value)
    473 
    474         elif value is True:
    475             self.simple_element("true")
    476 
    477         elif value is False:
    478             self.simple_element("false")
    479 
    480         elif isinstance(value, int):
    481             if -1 << 63 <= value < 1 << 64:
    482                 self.simple_element("integer", "%d" % value)
    483             else:
    484                 raise OverflowError(value)
    485 
    486         elif isinstance(value, float):
    487             self.simple_element("real", repr(value))
    488 
    489         elif isinstance(value, dict):
    490             self.write_dict(value)
    491 
    492         elif isinstance(value, Data):
    493             self.write_data(value)
    494 
    495         elif isinstance(value, (bytes, bytearray)):
    496             self.write_bytes(value)
    497 
    498         elif isinstance(value, datetime.datetime):
    499             self.simple_element("date", _date_to_string(value))
    500 
    501         elif isinstance(value, (tuple, list)):
    502             self.write_array(value)
    503 
    504         else:
    505             raise TypeError("unsupported type: %s" % type(value))
    506 
    507     def write_data(self, data):
    508         self.write_bytes(data.data)
    509 
    510     def write_bytes(self, data):
    511         self.begin_element("data")
    512         self._indent_level -= 1
    513         maxlinelength = max(
    514             16,
    515             76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
    516 
    517         for line in _encode_base64(data, maxlinelength).split(b"\n"):
    518             if line:
    519                 self.writeln(line)
    520         self._indent_level += 1
    521         self.end_element("data")
    522 
    523     def write_dict(self, d):
    524         if d:
    525             self.begin_element("dict")
    526             if self._sort_keys:
    527                 items = sorted(d.items())
    528             else:
    529                 items = d.items()
    530 
    531             for key, value in items:
    532                 if not isinstance(key, str):
    533                     if self._skipkeys:
    534                         continue
    535                     raise TypeError("keys must be strings")
    536                 self.simple_element("key", key)
    537                 self.write_value(value)
    538             self.end_element("dict")
    539 
    540         else:
    541             self.simple_element("dict")
    542 
    543     def write_array(self, array):
    544         if array:
    545             self.begin_element("array")
    546             for value in array:
    547                 self.write_value(value)
    548             self.end_element("array")
    549 
    550         else:
    551             self.simple_element("array")
    552 
    553 
    554 def _is_fmt_xml(header):
    555     prefixes = (b'<?xml', b'<plist')
    556 
    557     for pfx in prefixes:
    558         if header.startswith(pfx):
    559             return True
    560 
    561     # Also check for alternative XML encodings, this is slightly
    562     # overkill because the Apple tools (and plistlib) will not
    563     # generate files with these encodings.
    564     for bom, encoding in (
    565                 (codecs.BOM_UTF8, "utf-8"),
    566                 (codecs.BOM_UTF16_BE, "utf-16-be"),
    567                 (codecs.BOM_UTF16_LE, "utf-16-le"),
    568                 # expat does not support utf-32
    569                 #(codecs.BOM_UTF32_BE, "utf-32-be"),
    570                 #(codecs.BOM_UTF32_LE, "utf-32-le"),
    571             ):
    572         if not header.startswith(bom):
    573             continue
    574 
    575         for start in prefixes:
    576             prefix = bom + start.decode('ascii').encode(encoding)
    577             if header[:len(prefix)] == prefix:
    578                 return True
    579 
    580     return False
    581 
    582 #
    583 # Binary Plist
    584 #
    585 
    586 
    587 class InvalidFileException (ValueError):
    588     def __init__(self, message="Invalid file"):
    589         ValueError.__init__(self, message)
    590 
    591 _BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
    592 
    593 class _BinaryPlistParser:
    594     """
    595     Read or write a binary plist file, following the description of the binary
    596     format.  Raise InvalidFileException in case of error, otherwise return the
    597     root object.
    598 
    599     see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
    600     """
    601     def __init__(self, use_builtin_types, dict_type):
    602         self._use_builtin_types = use_builtin_types
    603         self._dict_type = dict_type
    604 
    605     def parse(self, fp):
    606         try:
    607             # The basic file format:
    608             # HEADER
    609             # object...
    610             # refid->offset...
    611             # TRAILER
    612             self._fp = fp
    613             self._fp.seek(-32, os.SEEK_END)
    614             trailer = self._fp.read(32)
    615             if len(trailer) != 32:
    616                 raise InvalidFileException()
    617             (
    618                 offset_size, self._ref_size, num_objects, top_object,
    619                 offset_table_offset
    620             ) = struct.unpack('>6xBBQQQ', trailer)
    621             self._fp.seek(offset_table_offset)
    622             self._object_offsets = self._read_ints(num_objects, offset_size)
    623             return self._read_object(self._object_offsets[top_object])
    624 
    625         except (OSError, IndexError, struct.error):
    626             raise InvalidFileException()
    627 
    628     def _get_size(self, tokenL):
    629         """ return the size of the next object."""
    630         if tokenL == 0xF:
    631             m = self._fp.read(1)[0] & 0x3
    632             s = 1 << m
    633             f = '>' + _BINARY_FORMAT[s]
    634             return struct.unpack(f, self._fp.read(s))[0]
    635 
    636         return tokenL
    637 
    638     def _read_ints(self, n, size):
    639         data = self._fp.read(size * n)
    640         if size in _BINARY_FORMAT:
    641             return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
    642         else:
    643             return tuple(int.from_bytes(data[i: i + size], 'big')
    644                          for i in range(0, size * n, size))
    645 
    646     def _read_refs(self, n):
    647         return self._read_ints(n, self._ref_size)
    648 
    649     def _read_object(self, offset):
    650         """
    651         read the object at offset.
    652 
    653         May recursively read sub-objects (content of an array/dict/set)
    654         """
    655         self._fp.seek(offset)
    656         token = self._fp.read(1)[0]
    657         tokenH, tokenL = token & 0xF0, token & 0x0F
    658 
    659         if token == 0x00:
    660             return None
    661 
    662         elif token == 0x08:
    663             return False
    664 
    665         elif token == 0x09:
    666             return True
    667 
    668         # The referenced source code also mentions URL (0x0c, 0x0d) and
    669         # UUID (0x0e), but neither can be generated using the Cocoa libraries.
    670 
    671         elif token == 0x0f:
    672             return b''
    673 
    674         elif tokenH == 0x10:  # int
    675             return int.from_bytes(self._fp.read(1 << tokenL),
    676                                   'big', signed=tokenL >= 3)
    677 
    678         elif token == 0x22: # real
    679             return struct.unpack('>f', self._fp.read(4))[0]
    680 
    681         elif token == 0x23: # real
    682             return struct.unpack('>d', self._fp.read(8))[0]
    683 
    684         elif token == 0x33:  # date
    685             f = struct.unpack('>d', self._fp.read(8))[0]
    686             # timestamp 0 of binary plists corresponds to 1/1/2001
    687             # (year of Mac OS X 10.0), instead of 1/1/1970.
    688             return datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=f)
    689 
    690         elif tokenH == 0x40:  # data
    691             s = self._get_size(tokenL)
    692             if self._use_builtin_types:
    693                 return self._fp.read(s)
    694             else:
    695                 return Data(self._fp.read(s))
    696 
    697         elif tokenH == 0x50:  # ascii string
    698             s = self._get_size(tokenL)
    699             result =  self._fp.read(s).decode('ascii')
    700             return result
    701 
    702         elif tokenH == 0x60:  # unicode string
    703             s = self._get_size(tokenL)
    704             return self._fp.read(s * 2).decode('utf-16be')
    705 
    706         # tokenH == 0x80 is documented as 'UID' and appears to be used for
    707         # keyed-archiving, not in plists.
    708 
    709         elif tokenH == 0xA0:  # array
    710             s = self._get_size(tokenL)
    711             obj_refs = self._read_refs(s)
    712             return [self._read_object(self._object_offsets[x])
    713                 for x in obj_refs]
    714 
    715         # tokenH == 0xB0 is documented as 'ordset', but is not actually
    716         # implemented in the Apple reference code.
    717 
    718         # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
    719         # plists.
    720 
    721         elif tokenH == 0xD0:  # dict
    722             s = self._get_size(tokenL)
    723             key_refs = self._read_refs(s)
    724             obj_refs = self._read_refs(s)
    725             result = self._dict_type()
    726             for k, o in zip(key_refs, obj_refs):
    727                 result[self._read_object(self._object_offsets[k])
    728                     ] = self._read_object(self._object_offsets[o])
    729             return result
    730 
    731         raise InvalidFileException()
    732 
    733 def _count_to_size(count):
    734     if count < 1 << 8:
    735         return 1
    736 
    737     elif count < 1 << 16:
    738         return 2
    739 
    740     elif count << 1 << 32:
    741         return 4
    742 
    743     else:
    744         return 8
    745 
    746 class _BinaryPlistWriter (object):
    747     def __init__(self, fp, sort_keys, skipkeys):
    748         self._fp = fp
    749         self._sort_keys = sort_keys
    750         self._skipkeys = skipkeys
    751 
    752     def write(self, value):
    753 
    754         # Flattened object list:
    755         self._objlist = []
    756 
    757         # Mappings from object->objectid
    758         # First dict has (type(object), object) as the key,
    759         # second dict is used when object is not hashable and
    760         # has id(object) as the key.
    761         self._objtable = {}
    762         self._objidtable = {}
    763 
    764         # Create list of all objects in the plist
    765         self._flatten(value)
    766 
    767         # Size of object references in serialized containers
    768         # depends on the number of objects in the plist.
    769         num_objects = len(self._objlist)
    770         self._object_offsets = [0]*num_objects
    771         self._ref_size = _count_to_size(num_objects)
    772 
    773         self._ref_format = _BINARY_FORMAT[self._ref_size]
    774 
    775         # Write file header
    776         self._fp.write(b'bplist00')
    777 
    778         # Write object list
    779         for obj in self._objlist:
    780             self._write_object(obj)
    781 
    782         # Write refnum->object offset table
    783         top_object = self._getrefnum(value)
    784         offset_table_offset = self._fp.tell()
    785         offset_size = _count_to_size(offset_table_offset)
    786         offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
    787         self._fp.write(struct.pack(offset_format, *self._object_offsets))
    788 
    789         # Write trailer
    790         sort_version = 0
    791         trailer = (
    792             sort_version, offset_size, self._ref_size, num_objects,
    793             top_object, offset_table_offset
    794         )
    795         self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
    796 
    797     def _flatten(self, value):
    798         # First check if the object is in the object table, not used for
    799         # containers to ensure that two subcontainers with the same contents
    800         # will be serialized as distinct values.
    801         if isinstance(value, (
    802                 str, int, float, datetime.datetime, bytes, bytearray)):
    803             if (type(value), value) in self._objtable:
    804                 return
    805 
    806         elif isinstance(value, Data):
    807             if (type(value.data), value.data) in self._objtable:
    808                 return
    809 
    810         # Add to objectreference map
    811         refnum = len(self._objlist)
    812         self._objlist.append(value)
    813         try:
    814             if isinstance(value, Data):
    815                 self._objtable[(type(value.data), value.data)] = refnum
    816             else:
    817                 self._objtable[(type(value), value)] = refnum
    818         except TypeError:
    819             self._objidtable[id(value)] = refnum
    820 
    821         # And finally recurse into containers
    822         if isinstance(value, dict):
    823             keys = []
    824             values = []
    825             items = value.items()
    826             if self._sort_keys:
    827                 items = sorted(items)
    828 
    829             for k, v in items:
    830                 if not isinstance(k, str):
    831                     if self._skipkeys:
    832                         continue
    833                     raise TypeError("keys must be strings")
    834                 keys.append(k)
    835                 values.append(v)
    836 
    837             for o in itertools.chain(keys, values):
    838                 self._flatten(o)
    839 
    840         elif isinstance(value, (list, tuple)):
    841             for o in value:
    842                 self._flatten(o)
    843 
    844     def _getrefnum(self, value):
    845         try:
    846             if isinstance(value, Data):
    847                 return self._objtable[(type(value.data), value.data)]
    848             else:
    849                 return self._objtable[(type(value), value)]
    850         except TypeError:
    851             return self._objidtable[id(value)]
    852 
    853     def _write_size(self, token, size):
    854         if size < 15:
    855             self._fp.write(struct.pack('>B', token | size))
    856 
    857         elif size < 1 << 8:
    858             self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
    859 
    860         elif size < 1 << 16:
    861             self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
    862 
    863         elif size < 1 << 32:
    864             self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
    865 
    866         else:
    867             self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
    868 
    869     def _write_object(self, value):
    870         ref = self._getrefnum(value)
    871         self._object_offsets[ref] = self._fp.tell()
    872         if value is None:
    873             self._fp.write(b'\x00')
    874 
    875         elif value is False:
    876             self._fp.write(b'\x08')
    877 
    878         elif value is True:
    879             self._fp.write(b'\x09')
    880 
    881         elif isinstance(value, int):
    882             if value < 0:
    883                 try:
    884                     self._fp.write(struct.pack('>Bq', 0x13, value))
    885                 except struct.error:
    886                     raise OverflowError(value) from None
    887             elif value < 1 << 8:
    888                 self._fp.write(struct.pack('>BB', 0x10, value))
    889             elif value < 1 << 16:
    890                 self._fp.write(struct.pack('>BH', 0x11, value))
    891             elif value < 1 << 32:
    892                 self._fp.write(struct.pack('>BL', 0x12, value))
    893             elif value < 1 << 63:
    894                 self._fp.write(struct.pack('>BQ', 0x13, value))
    895             elif value < 1 << 64:
    896                 self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
    897             else:
    898                 raise OverflowError(value)
    899 
    900         elif isinstance(value, float):
    901             self._fp.write(struct.pack('>Bd', 0x23, value))
    902 
    903         elif isinstance(value, datetime.datetime):
    904             f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
    905             self._fp.write(struct.pack('>Bd', 0x33, f))
    906 
    907         elif isinstance(value, Data):
    908             self._write_size(0x40, len(value.data))
    909             self._fp.write(value.data)
    910 
    911         elif isinstance(value, (bytes, bytearray)):
    912             self._write_size(0x40, len(value))
    913             self._fp.write(value)
    914 
    915         elif isinstance(value, str):
    916             try:
    917                 t = value.encode('ascii')
    918                 self._write_size(0x50, len(value))
    919             except UnicodeEncodeError:
    920                 t = value.encode('utf-16be')
    921                 self._write_size(0x60, len(t) // 2)
    922 
    923             self._fp.write(t)
    924 
    925         elif isinstance(value, (list, tuple)):
    926             refs = [self._getrefnum(o) for o in value]
    927             s = len(refs)
    928             self._write_size(0xA0, s)
    929             self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
    930 
    931         elif isinstance(value, dict):
    932             keyRefs, valRefs = [], []
    933 
    934             if self._sort_keys:
    935                 rootItems = sorted(value.items())
    936             else:
    937                 rootItems = value.items()
    938 
    939             for k, v in rootItems:
    940                 if not isinstance(k, str):
    941                     if self._skipkeys:
    942                         continue
    943                     raise TypeError("keys must be strings")
    944                 keyRefs.append(self._getrefnum(k))
    945                 valRefs.append(self._getrefnum(v))
    946 
    947             s = len(keyRefs)
    948             self._write_size(0xD0, s)
    949             self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
    950             self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
    951 
    952         else:
    953             raise TypeError(value)
    954 
    955 
    956 def _is_fmt_binary(header):
    957     return header[:8] == b'bplist00'
    958 
    959 
    960 #
    961 # Generic bits
    962 #
    963 
    964 _FORMATS={
    965     FMT_XML: dict(
    966         detect=_is_fmt_xml,
    967         parser=_PlistParser,
    968         writer=_PlistWriter,
    969     ),
    970     FMT_BINARY: dict(
    971         detect=_is_fmt_binary,
    972         parser=_BinaryPlistParser,
    973         writer=_BinaryPlistWriter,
    974     )
    975 }
    976 
    977 
    978 def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
    979     """Read a .plist file. 'fp' should be (readable) file object.
    980     Return the unpacked root object (which usually is a dictionary).
    981     """
    982     if fmt is None:
    983         header = fp.read(32)
    984         fp.seek(0)
    985         for info in _FORMATS.values():
    986             if info['detect'](header):
    987                 P = info['parser']
    988                 break
    989 
    990         else:
    991             raise InvalidFileException()
    992 
    993     else:
    994         P = _FORMATS[fmt]['parser']
    995 
    996     p = P(use_builtin_types=use_builtin_types, dict_type=dict_type)
    997     return p.parse(fp)
    998 
    999 
   1000 def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
   1001     """Read a .plist file from a bytes object.
   1002     Return the unpacked root object (which usually is a dictionary).
   1003     """
   1004     fp = BytesIO(value)
   1005     return load(
   1006         fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
   1007 
   1008 
   1009 def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
   1010     """Write 'value' to a .plist file. 'fp' should be a (writable)
   1011     file object.
   1012     """
   1013     if fmt not in _FORMATS:
   1014         raise ValueError("Unsupported format: %r"%(fmt,))
   1015 
   1016     writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
   1017     writer.write(value)
   1018 
   1019 
   1020 def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
   1021     """Return a bytes object with the contents for a .plist file.
   1022     """
   1023     fp = BytesIO()
   1024     dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
   1025     return fp.getvalue()
   1026