Home | History | Annotate | Download | only in Lib
      1 r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
      2 
      3 The property list (.plist) file format is a simple XML pickle supporting
      4 basic object types, like dictionaries, lists, numbers and strings.
      5 Usually the top level object is a dictionary.
      6 
      7 To write out a plist file, use the dump(value, file)
      8 function. 'value' is the top level object, 'file' is
      9 a (writable) file object.
     10 
     11 To parse a plist from a file, use the load(file) function,
     12 with a (readable) file object as the only argument. It
     13 returns the top level object (again, usually a dictionary).
     14 
     15 To work with plist data in bytes objects, you can use loads()
     16 and dumps().
     17 
     18 Values can be strings, integers, floats, booleans, tuples, lists,
     19 dictionaries (but only with string keys), Data, bytes, bytearray, or
     20 datetime.datetime objects.
     21 
     22 Generate Plist example:
     23 
     24     pl = dict(
     25         aString = "Doodah",
     26         aList = ["A", "B", 12, 32.1, [1, 2, 3]],
     27         aFloat = 0.1,
     28         anInt = 728,
     29         aDict = dict(
     30             anotherString = "<hello & hi there!>",
     31             aUnicodeValue = "M\xe4ssig, Ma\xdf",
     32             aTrueValue = True,
     33             aFalseValue = False,
     34         ),
     35         someData = b"<binary gunk>",
     36         someMoreData = b"<lots of binary gunk>" * 10,
     37         aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
     38     )
     39     with open(fileName, 'wb') as fp:
     40         dump(pl, fp)
     41 
     42 Parse Plist example:
     43 
     44     with open(fileName, 'rb') as fp:
     45         pl = load(fp)
     46     print(pl["aKey"])
     47 """
     48 __all__ = [
     49     "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
     50     "Data", "InvalidFileException", "FMT_XML", "FMT_BINARY",
     51     "load", "dump", "loads", "dumps"
     52 ]
     53 
     54 import binascii
     55 import codecs
     56 import contextlib
     57 import datetime
     58 import enum
     59 from io import BytesIO
     60 import itertools
     61 import os
     62 import re
     63 import struct
     64 from warnings import warn
     65 from xml.parsers.expat import ParserCreate
     66 
     67 
     68 PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
     69 globals().update(PlistFormat.__members__)
     70 
     71 
     72 #
     73 #
     74 # Deprecated functionality
     75 #
     76 #
     77 
     78 
     79 @contextlib.contextmanager
     80 def _maybe_open(pathOrFile, mode):
     81     if isinstance(pathOrFile, str):
     82         with open(pathOrFile, mode) as fp:
     83             yield fp
     84 
     85     else:
     86         yield pathOrFile
     87 
     88 
     89 def readPlist(pathOrFile):
     90     """
     91     Read a .plist from a path or file. pathOrFile should either
     92     be a file name, or a readable binary file object.
     93 
     94     This function is deprecated, use load instead.
     95     """
     96     warn("The readPlist function is deprecated, use load() instead",
     97         DeprecationWarning, 2)
     98 
     99     with _maybe_open(pathOrFile, 'rb') as fp:
    100         return load(fp, fmt=None, use_builtin_types=False)
    101 
    102 def writePlist(value, pathOrFile):
    103     """
    104     Write 'value' to a .plist file. 'pathOrFile' may either be a
    105     file name or a (writable) file object.
    106 
    107     This function is deprecated, use dump instead.
    108     """
    109     warn("The writePlist function is deprecated, use dump() instead",
    110         DeprecationWarning, 2)
    111     with _maybe_open(pathOrFile, 'wb') as fp:
    112         dump(value, fp, fmt=FMT_XML, sort_keys=True, skipkeys=False)
    113 
    114 
    115 def readPlistFromBytes(data):
    116     """
    117     Read a plist data from a bytes object. Return the root object.
    118 
    119     This function is deprecated, use loads instead.
    120     """
    121     warn("The readPlistFromBytes function is deprecated, use loads() instead",
    122         DeprecationWarning, 2)
    123     return load(BytesIO(data), fmt=None, use_builtin_types=False)
    124 
    125 
    126 def writePlistToBytes(value):
    127     """
    128     Return 'value' as a plist-formatted bytes object.
    129 
    130     This function is deprecated, use dumps instead.
    131     """
    132     warn("The writePlistToBytes function is deprecated, use dumps() instead",
    133         DeprecationWarning, 2)
    134     f = BytesIO()
    135     dump(value, f, fmt=FMT_XML, sort_keys=True, skipkeys=False)
    136     return f.getvalue()
    137 
    138 
    139 class Data:
    140     """
    141     Wrapper for binary data.
    142 
    143     This class is deprecated, use a bytes object instead.
    144     """
    145 
    146     def __init__(self, data):
    147         if not isinstance(data, bytes):
    148             raise TypeError("data must be as bytes")
    149         self.data = data
    150 
    151     @classmethod
    152     def fromBase64(cls, data):
    153         # base64.decodebytes just calls binascii.a2b_base64;
    154         # it seems overkill to use both base64 and binascii.
    155         return cls(_decode_base64(data))
    156 
    157     def asBase64(self, maxlinelength=76):
    158         return _encode_base64(self.data, maxlinelength)
    159 
    160     def __eq__(self, other):
    161         if isinstance(other, self.__class__):
    162             return self.data == other.data
    163         elif isinstance(other, bytes):
    164             return self.data == other
    165         else:
    166             return NotImplemented
    167 
    168     def __repr__(self):
    169         return "%s(%s)" % (self.__class__.__name__, repr(self.data))
    170 
    171 #
    172 #
    173 # End of deprecated functionality
    174 #
    175 #
    176 
    177 
    178 #
    179 # XML support
    180 #
    181 
    182 
    183 # XML 'header'
    184 PLISTHEADER = b"""\
    185 <?xml version="1.0" encoding="UTF-8"?>
    186 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
    187 """
    188 
    189 
    190 # Regex to find any control chars, except for \t \n and \r
    191 _controlCharPat = re.compile(
    192     r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
    193     r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
    194 
    195 def _encode_base64(s, maxlinelength=76):
    196     # copied from base64.encodebytes(), with added maxlinelength argument
    197     maxbinsize = (maxlinelength//4)*3
    198     pieces = []
    199     for i in range(0, len(s), maxbinsize):
    200         chunk = s[i : i + maxbinsize]
    201         pieces.append(binascii.b2a_base64(chunk))
    202     return b''.join(pieces)
    203 
    204 def _decode_base64(s):
    205     if isinstance(s, str):
    206         return binascii.a2b_base64(s.encode("utf-8"))
    207 
    208     else:
    209         return binascii.a2b_base64(s)
    210 
    211 # Contents should conform to a subset of ISO 8601
    212 # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'.  Smaller units
    213 # may be omitted with #  a loss of precision)
    214 _dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
    215 
    216 
    217 def _date_from_string(s):
    218     order = ('year', 'month', 'day', 'hour', 'minute', 'second')
    219     gd = _dateParser.match(s).groupdict()
    220     lst = []
    221     for key in order:
    222         val = gd[key]
    223         if val is None:
    224             break
    225         lst.append(int(val))
    226     return datetime.datetime(*lst)
    227 
    228 
    229 def _date_to_string(d):
    230     return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
    231         d.year, d.month, d.day,
    232         d.hour, d.minute, d.second
    233     )
    234 
    235 def _escape(text):
    236     m = _controlCharPat.search(text)
    237     if m is not None:
    238         raise ValueError("strings can't contains control characters; "
    239                          "use bytes instead")
    240     text = text.replace("\r\n", "\n")       # convert DOS line endings
    241     text = text.replace("\r", "\n")         # convert Mac line endings
    242     text = text.replace("&", "&amp;")       # escape '&'
    243     text = text.replace("<", "&lt;")        # escape '<'
    244     text = text.replace(">", "&gt;")        # escape '>'
    245     return text
    246 
    247 class _PlistParser:
    248     def __init__(self, use_builtin_types, dict_type):
    249         self.stack = []
    250         self.current_key = None
    251         self.root = None
    252         self._use_builtin_types = use_builtin_types
    253         self._dict_type = dict_type
    254 
    255     def parse(self, fileobj):
    256         self.parser = ParserCreate()
    257         self.parser.StartElementHandler = self.handle_begin_element
    258         self.parser.EndElementHandler = self.handle_end_element
    259         self.parser.CharacterDataHandler = self.handle_data
    260         self.parser.ParseFile(fileobj)
    261         return self.root
    262 
    263     def handle_begin_element(self, element, attrs):
    264         self.data = []
    265         handler = getattr(self, "begin_" + element, None)
    266         if handler is not None:
    267             handler(attrs)
    268 
    269     def handle_end_element(self, element):
    270         handler = getattr(self, "end_" + element, None)
    271         if handler is not None:
    272             handler()
    273 
    274     def handle_data(self, data):
    275         self.data.append(data)
    276 
    277     def add_object(self, value):
    278         if self.current_key is not None:
    279             if not isinstance(self.stack[-1], type({})):
    280                 raise ValueError("unexpected element at line %d" %
    281                                  self.parser.CurrentLineNumber)
    282             self.stack[-1][self.current_key] = value
    283             self.current_key = None
    284         elif not self.stack:
    285             # this is the root object
    286             self.root = value
    287         else:
    288             if not isinstance(self.stack[-1], type([])):
    289                 raise ValueError("unexpected element at line %d" %
    290                                  self.parser.CurrentLineNumber)
    291             self.stack[-1].append(value)
    292 
    293     def get_data(self):
    294         data = ''.join(self.data)
    295         self.data = []
    296         return data
    297 
    298     # element handlers
    299 
    300     def begin_dict(self, attrs):
    301         d = self._dict_type()
    302         self.add_object(d)
    303         self.stack.append(d)
    304 
    305     def end_dict(self):
    306         if self.current_key:
    307             raise ValueError("missing value for key '%s' at line %d" %
    308                              (self.current_key,self.parser.CurrentLineNumber))
    309         self.stack.pop()
    310 
    311     def end_key(self):
    312         if self.current_key or not isinstance(self.stack[-1], type({})):
    313             raise ValueError("unexpected key at line %d" %
    314                              self.parser.CurrentLineNumber)
    315         self.current_key = self.get_data()
    316 
    317     def begin_array(self, attrs):
    318         a = []
    319         self.add_object(a)
    320         self.stack.append(a)
    321 
    322     def end_array(self):
    323         self.stack.pop()
    324 
    325     def end_true(self):
    326         self.add_object(True)
    327 
    328     def end_false(self):
    329         self.add_object(False)
    330 
    331     def end_integer(self):
    332         self.add_object(int(self.get_data()))
    333 
    334     def end_real(self):
    335         self.add_object(float(self.get_data()))
    336 
    337     def end_string(self):
    338         self.add_object(self.get_data())
    339 
    340     def end_data(self):
    341         if self._use_builtin_types:
    342             self.add_object(_decode_base64(self.get_data()))
    343 
    344         else:
    345             self.add_object(Data.fromBase64(self.get_data()))
    346 
    347     def end_date(self):
    348         self.add_object(_date_from_string(self.get_data()))
    349 
    350 
    351 class _DumbXMLWriter:
    352     def __init__(self, file, indent_level=0, indent="\t"):
    353         self.file = file
    354         self.stack = []
    355         self._indent_level = indent_level
    356         self.indent = indent
    357 
    358     def begin_element(self, element):
    359         self.stack.append(element)
    360         self.writeln("<%s>" % element)
    361         self._indent_level += 1
    362 
    363     def end_element(self, element):
    364         assert self._indent_level > 0
    365         assert self.stack.pop() == element
    366         self._indent_level -= 1
    367         self.writeln("</%s>" % element)
    368 
    369     def simple_element(self, element, value=None):
    370         if value is not None:
    371             value = _escape(value)
    372             self.writeln("<%s>%s</%s>" % (element, value, element))
    373 
    374         else:
    375             self.writeln("<%s/>" % element)
    376 
    377     def writeln(self, line):
    378         if line:
    379             # plist has fixed encoding of utf-8
    380 
    381             # XXX: is this test needed?
    382             if isinstance(line, str):
    383                 line = line.encode('utf-8')
    384             self.file.write(self._indent_level * self.indent)
    385             self.file.write(line)
    386         self.file.write(b'\n')
    387 
    388 
    389 class _PlistWriter(_DumbXMLWriter):
    390     def __init__(
    391             self, file, indent_level=0, indent=b"\t", writeHeader=1,
    392             sort_keys=True, skipkeys=False):
    393 
    394         if writeHeader:
    395             file.write(PLISTHEADER)
    396         _DumbXMLWriter.__init__(self, file, indent_level, indent)
    397         self._sort_keys = sort_keys
    398         self._skipkeys = skipkeys
    399 
    400     def write(self, value):
    401         self.writeln("<plist version=\"1.0\">")
    402         self.write_value(value)
    403         self.writeln("</plist>")
    404 
    405     def write_value(self, value):
    406         if isinstance(value, str):
    407             self.simple_element("string", value)
    408 
    409         elif value is True:
    410             self.simple_element("true")
    411 
    412         elif value is False:
    413             self.simple_element("false")
    414 
    415         elif isinstance(value, int):
    416             if -1 << 63 <= value < 1 << 64:
    417                 self.simple_element("integer", "%d" % value)
    418             else:
    419                 raise OverflowError(value)
    420 
    421         elif isinstance(value, float):
    422             self.simple_element("real", repr(value))
    423 
    424         elif isinstance(value, dict):
    425             self.write_dict(value)
    426 
    427         elif isinstance(value, Data):
    428             self.write_data(value)
    429 
    430         elif isinstance(value, (bytes, bytearray)):
    431             self.write_bytes(value)
    432 
    433         elif isinstance(value, datetime.datetime):
    434             self.simple_element("date", _date_to_string(value))
    435 
    436         elif isinstance(value, (tuple, list)):
    437             self.write_array(value)
    438 
    439         else:
    440             raise TypeError("unsupported type: %s" % type(value))
    441 
    442     def write_data(self, data):
    443         self.write_bytes(data.data)
    444 
    445     def write_bytes(self, data):
    446         self.begin_element("data")
    447         self._indent_level -= 1
    448         maxlinelength = max(
    449             16,
    450             76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level))
    451 
    452         for line in _encode_base64(data, maxlinelength).split(b"\n"):
    453             if line:
    454                 self.writeln(line)
    455         self._indent_level += 1
    456         self.end_element("data")
    457 
    458     def write_dict(self, d):
    459         if d:
    460             self.begin_element("dict")
    461             if self._sort_keys:
    462                 items = sorted(d.items())
    463             else:
    464                 items = d.items()
    465 
    466             for key, value in items:
    467                 if not isinstance(key, str):
    468                     if self._skipkeys:
    469                         continue
    470                     raise TypeError("keys must be strings")
    471                 self.simple_element("key", key)
    472                 self.write_value(value)
    473             self.end_element("dict")
    474 
    475         else:
    476             self.simple_element("dict")
    477 
    478     def write_array(self, array):
    479         if array:
    480             self.begin_element("array")
    481             for value in array:
    482                 self.write_value(value)
    483             self.end_element("array")
    484 
    485         else:
    486             self.simple_element("array")
    487 
    488 
    489 def _is_fmt_xml(header):
    490     prefixes = (b'<?xml', b'<plist')
    491 
    492     for pfx in prefixes:
    493         if header.startswith(pfx):
    494             return True
    495 
    496     # Also check for alternative XML encodings, this is slightly
    497     # overkill because the Apple tools (and plistlib) will not
    498     # generate files with these encodings.
    499     for bom, encoding in (
    500                 (codecs.BOM_UTF8, "utf-8"),
    501                 (codecs.BOM_UTF16_BE, "utf-16-be"),
    502                 (codecs.BOM_UTF16_LE, "utf-16-le"),
    503                 # expat does not support utf-32
    504                 #(codecs.BOM_UTF32_BE, "utf-32-be"),
    505                 #(codecs.BOM_UTF32_LE, "utf-32-le"),
    506             ):
    507         if not header.startswith(bom):
    508             continue
    509 
    510         for start in prefixes:
    511             prefix = bom + start.decode('ascii').encode(encoding)
    512             if header[:len(prefix)] == prefix:
    513                 return True
    514 
    515     return False
    516 
    517 #
    518 # Binary Plist
    519 #
    520 
    521 
    522 class InvalidFileException (ValueError):
    523     def __init__(self, message="Invalid file"):
    524         ValueError.__init__(self, message)
    525 
    526 _BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
    527 
    528 _undefined = object()
    529 
    530 class _BinaryPlistParser:
    531     """
    532     Read or write a binary plist file, following the description of the binary
    533     format.  Raise InvalidFileException in case of error, otherwise return the
    534     root object.
    535 
    536     see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c
    537     """
    538     def __init__(self, use_builtin_types, dict_type):
    539         self._use_builtin_types = use_builtin_types
    540         self._dict_type = dict_type
    541 
    542     def parse(self, fp):
    543         try:
    544             # The basic file format:
    545             # HEADER
    546             # object...
    547             # refid->offset...
    548             # TRAILER
    549             self._fp = fp
    550             self._fp.seek(-32, os.SEEK_END)
    551             trailer = self._fp.read(32)
    552             if len(trailer) != 32:
    553                 raise InvalidFileException()
    554             (
    555                 offset_size, self._ref_size, num_objects, top_object,
    556                 offset_table_offset
    557             ) = struct.unpack('>6xBBQQQ', trailer)
    558             self._fp.seek(offset_table_offset)
    559             self._object_offsets = self._read_ints(num_objects, offset_size)
    560             self._objects = [_undefined] * num_objects
    561             return self._read_object(top_object)
    562 
    563         except (OSError, IndexError, struct.error, OverflowError,
    564                 UnicodeDecodeError):
    565             raise InvalidFileException()
    566 
    567     def _get_size(self, tokenL):
    568         """ return the size of the next object."""
    569         if tokenL == 0xF:
    570             m = self._fp.read(1)[0] & 0x3
    571             s = 1 << m
    572             f = '>' + _BINARY_FORMAT[s]
    573             return struct.unpack(f, self._fp.read(s))[0]
    574 
    575         return tokenL
    576 
    577     def _read_ints(self, n, size):
    578         data = self._fp.read(size * n)
    579         if size in _BINARY_FORMAT:
    580             return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
    581         else:
    582             if not size or len(data) != size * n:
    583                 raise InvalidFileException()
    584             return tuple(int.from_bytes(data[i: i + size], 'big')
    585                          for i in range(0, size * n, size))
    586 
    587     def _read_refs(self, n):
    588         return self._read_ints(n, self._ref_size)
    589 
    590     def _read_object(self, ref):
    591         """
    592         read the object by reference.
    593 
    594         May recursively read sub-objects (content of an array/dict/set)
    595         """
    596         result = self._objects[ref]
    597         if result is not _undefined:
    598             return result
    599 
    600         offset = self._object_offsets[ref]
    601         self._fp.seek(offset)
    602         token = self._fp.read(1)[0]
    603         tokenH, tokenL = token & 0xF0, token & 0x0F
    604 
    605         if token == 0x00:
    606             result = None
    607 
    608         elif token == 0x08:
    609             result = False
    610 
    611         elif token == 0x09:
    612             result = True
    613 
    614         # The referenced source code also mentions URL (0x0c, 0x0d) and
    615         # UUID (0x0e), but neither can be generated using the Cocoa libraries.
    616 
    617         elif token == 0x0f:
    618             result = b''
    619 
    620         elif tokenH == 0x10:  # int
    621             result = int.from_bytes(self._fp.read(1 << tokenL),
    622                                     'big', signed=tokenL >= 3)
    623 
    624         elif token == 0x22: # real
    625             result = struct.unpack('>f', self._fp.read(4))[0]
    626 
    627         elif token == 0x23: # real
    628             result = struct.unpack('>d', self._fp.read(8))[0]
    629 
    630         elif token == 0x33:  # date
    631             f = struct.unpack('>d', self._fp.read(8))[0]
    632             # timestamp 0 of binary plists corresponds to 1/1/2001
    633             # (year of Mac OS X 10.0), instead of 1/1/1970.
    634             result = (datetime.datetime(2001, 1, 1) +
    635                       datetime.timedelta(seconds=f))
    636 
    637         elif tokenH == 0x40:  # data
    638             s = self._get_size(tokenL)
    639             if self._use_builtin_types:
    640                 result = self._fp.read(s)
    641             else:
    642                 result = Data(self._fp.read(s))
    643 
    644         elif tokenH == 0x50:  # ascii string
    645             s = self._get_size(tokenL)
    646             result =  self._fp.read(s).decode('ascii')
    647             result = result
    648 
    649         elif tokenH == 0x60:  # unicode string
    650             s = self._get_size(tokenL)
    651             result = self._fp.read(s * 2).decode('utf-16be')
    652 
    653         # tokenH == 0x80 is documented as 'UID' and appears to be used for
    654         # keyed-archiving, not in plists.
    655 
    656         elif tokenH == 0xA0:  # array
    657             s = self._get_size(tokenL)
    658             obj_refs = self._read_refs(s)
    659             result = []
    660             self._objects[ref] = result
    661             result.extend(self._read_object(x) for x in obj_refs)
    662 
    663         # tokenH == 0xB0 is documented as 'ordset', but is not actually
    664         # implemented in the Apple reference code.
    665 
    666         # tokenH == 0xC0 is documented as 'set', but sets cannot be used in
    667         # plists.
    668 
    669         elif tokenH == 0xD0:  # dict
    670             s = self._get_size(tokenL)
    671             key_refs = self._read_refs(s)
    672             obj_refs = self._read_refs(s)
    673             result = self._dict_type()
    674             self._objects[ref] = result
    675             for k, o in zip(key_refs, obj_refs):
    676                 result[self._read_object(k)] = self._read_object(o)
    677 
    678         else:
    679             raise InvalidFileException()
    680 
    681         self._objects[ref] = result
    682         return result
    683 
    684 def _count_to_size(count):
    685     if count < 1 << 8:
    686         return 1
    687 
    688     elif count < 1 << 16:
    689         return 2
    690 
    691     elif count << 1 << 32:
    692         return 4
    693 
    694     else:
    695         return 8
    696 
    697 _scalars = (str, int, float, datetime.datetime, bytes)
    698 
    699 class _BinaryPlistWriter (object):
    700     def __init__(self, fp, sort_keys, skipkeys):
    701         self._fp = fp
    702         self._sort_keys = sort_keys
    703         self._skipkeys = skipkeys
    704 
    705     def write(self, value):
    706 
    707         # Flattened object list:
    708         self._objlist = []
    709 
    710         # Mappings from object->objectid
    711         # First dict has (type(object), object) as the key,
    712         # second dict is used when object is not hashable and
    713         # has id(object) as the key.
    714         self._objtable = {}
    715         self._objidtable = {}
    716 
    717         # Create list of all objects in the plist
    718         self._flatten(value)
    719 
    720         # Size of object references in serialized containers
    721         # depends on the number of objects in the plist.
    722         num_objects = len(self._objlist)
    723         self._object_offsets = [0]*num_objects
    724         self._ref_size = _count_to_size(num_objects)
    725 
    726         self._ref_format = _BINARY_FORMAT[self._ref_size]
    727 
    728         # Write file header
    729         self._fp.write(b'bplist00')
    730 
    731         # Write object list
    732         for obj in self._objlist:
    733             self._write_object(obj)
    734 
    735         # Write refnum->object offset table
    736         top_object = self._getrefnum(value)
    737         offset_table_offset = self._fp.tell()
    738         offset_size = _count_to_size(offset_table_offset)
    739         offset_format = '>' + _BINARY_FORMAT[offset_size] * num_objects
    740         self._fp.write(struct.pack(offset_format, *self._object_offsets))
    741 
    742         # Write trailer
    743         sort_version = 0
    744         trailer = (
    745             sort_version, offset_size, self._ref_size, num_objects,
    746             top_object, offset_table_offset
    747         )
    748         self._fp.write(struct.pack('>5xBBBQQQ', *trailer))
    749 
    750     def _flatten(self, value):
    751         # First check if the object is in the object table, not used for
    752         # containers to ensure that two subcontainers with the same contents
    753         # will be serialized as distinct values.
    754         if isinstance(value, _scalars):
    755             if (type(value), value) in self._objtable:
    756                 return
    757 
    758         elif isinstance(value, Data):
    759             if (type(value.data), value.data) in self._objtable:
    760                 return
    761 
    762         elif id(value) in self._objidtable:
    763             return
    764 
    765         # Add to objectreference map
    766         refnum = len(self._objlist)
    767         self._objlist.append(value)
    768         if isinstance(value, _scalars):
    769             self._objtable[(type(value), value)] = refnum
    770         elif isinstance(value, Data):
    771             self._objtable[(type(value.data), value.data)] = refnum
    772         else:
    773             self._objidtable[id(value)] = refnum
    774 
    775         # And finally recurse into containers
    776         if isinstance(value, dict):
    777             keys = []
    778             values = []
    779             items = value.items()
    780             if self._sort_keys:
    781                 items = sorted(items)
    782 
    783             for k, v in items:
    784                 if not isinstance(k, str):
    785                     if self._skipkeys:
    786                         continue
    787                     raise TypeError("keys must be strings")
    788                 keys.append(k)
    789                 values.append(v)
    790 
    791             for o in itertools.chain(keys, values):
    792                 self._flatten(o)
    793 
    794         elif isinstance(value, (list, tuple)):
    795             for o in value:
    796                 self._flatten(o)
    797 
    798     def _getrefnum(self, value):
    799         if isinstance(value, _scalars):
    800             return self._objtable[(type(value), value)]
    801         elif isinstance(value, Data):
    802             return self._objtable[(type(value.data), value.data)]
    803         else:
    804             return self._objidtable[id(value)]
    805 
    806     def _write_size(self, token, size):
    807         if size < 15:
    808             self._fp.write(struct.pack('>B', token | size))
    809 
    810         elif size < 1 << 8:
    811             self._fp.write(struct.pack('>BBB', token | 0xF, 0x10, size))
    812 
    813         elif size < 1 << 16:
    814             self._fp.write(struct.pack('>BBH', token | 0xF, 0x11, size))
    815 
    816         elif size < 1 << 32:
    817             self._fp.write(struct.pack('>BBL', token | 0xF, 0x12, size))
    818 
    819         else:
    820             self._fp.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
    821 
    822     def _write_object(self, value):
    823         ref = self._getrefnum(value)
    824         self._object_offsets[ref] = self._fp.tell()
    825         if value is None:
    826             self._fp.write(b'\x00')
    827 
    828         elif value is False:
    829             self._fp.write(b'\x08')
    830 
    831         elif value is True:
    832             self._fp.write(b'\x09')
    833 
    834         elif isinstance(value, int):
    835             if value < 0:
    836                 try:
    837                     self._fp.write(struct.pack('>Bq', 0x13, value))
    838                 except struct.error:
    839                     raise OverflowError(value) from None
    840             elif value < 1 << 8:
    841                 self._fp.write(struct.pack('>BB', 0x10, value))
    842             elif value < 1 << 16:
    843                 self._fp.write(struct.pack('>BH', 0x11, value))
    844             elif value < 1 << 32:
    845                 self._fp.write(struct.pack('>BL', 0x12, value))
    846             elif value < 1 << 63:
    847                 self._fp.write(struct.pack('>BQ', 0x13, value))
    848             elif value < 1 << 64:
    849                 self._fp.write(b'\x14' + value.to_bytes(16, 'big', signed=True))
    850             else:
    851                 raise OverflowError(value)
    852 
    853         elif isinstance(value, float):
    854             self._fp.write(struct.pack('>Bd', 0x23, value))
    855 
    856         elif isinstance(value, datetime.datetime):
    857             f = (value - datetime.datetime(2001, 1, 1)).total_seconds()
    858             self._fp.write(struct.pack('>Bd', 0x33, f))
    859 
    860         elif isinstance(value, Data):
    861             self._write_size(0x40, len(value.data))
    862             self._fp.write(value.data)
    863 
    864         elif isinstance(value, (bytes, bytearray)):
    865             self._write_size(0x40, len(value))
    866             self._fp.write(value)
    867 
    868         elif isinstance(value, str):
    869             try:
    870                 t = value.encode('ascii')
    871                 self._write_size(0x50, len(value))
    872             except UnicodeEncodeError:
    873                 t = value.encode('utf-16be')
    874                 self._write_size(0x60, len(t) // 2)
    875 
    876             self._fp.write(t)
    877 
    878         elif isinstance(value, (list, tuple)):
    879             refs = [self._getrefnum(o) for o in value]
    880             s = len(refs)
    881             self._write_size(0xA0, s)
    882             self._fp.write(struct.pack('>' + self._ref_format * s, *refs))
    883 
    884         elif isinstance(value, dict):
    885             keyRefs, valRefs = [], []
    886 
    887             if self._sort_keys:
    888                 rootItems = sorted(value.items())
    889             else:
    890                 rootItems = value.items()
    891 
    892             for k, v in rootItems:
    893                 if not isinstance(k, str):
    894                     if self._skipkeys:
    895                         continue
    896                     raise TypeError("keys must be strings")
    897                 keyRefs.append(self._getrefnum(k))
    898                 valRefs.append(self._getrefnum(v))
    899 
    900             s = len(keyRefs)
    901             self._write_size(0xD0, s)
    902             self._fp.write(struct.pack('>' + self._ref_format * s, *keyRefs))
    903             self._fp.write(struct.pack('>' + self._ref_format * s, *valRefs))
    904 
    905         else:
    906             raise TypeError(value)
    907 
    908 
    909 def _is_fmt_binary(header):
    910     return header[:8] == b'bplist00'
    911 
    912 
    913 #
    914 # Generic bits
    915 #
    916 
    917 _FORMATS={
    918     FMT_XML: dict(
    919         detect=_is_fmt_xml,
    920         parser=_PlistParser,
    921         writer=_PlistWriter,
    922     ),
    923     FMT_BINARY: dict(
    924         detect=_is_fmt_binary,
    925         parser=_BinaryPlistParser,
    926         writer=_BinaryPlistWriter,
    927     )
    928 }
    929 
    930 
    931 def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict):
    932     """Read a .plist file. 'fp' should be (readable) file object.
    933     Return the unpacked root object (which usually is a dictionary).
    934     """
    935     if fmt is None:
    936         header = fp.read(32)
    937         fp.seek(0)
    938         for info in _FORMATS.values():
    939             if info['detect'](header):
    940                 P = info['parser']
    941                 break
    942 
    943         else:
    944             raise InvalidFileException()
    945 
    946     else:
    947         P = _FORMATS[fmt]['parser']
    948 
    949     p = P(use_builtin_types=use_builtin_types, dict_type=dict_type)
    950     return p.parse(fp)
    951 
    952 
    953 def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict):
    954     """Read a .plist file from a bytes object.
    955     Return the unpacked root object (which usually is a dictionary).
    956     """
    957     fp = BytesIO(value)
    958     return load(
    959         fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type)
    960 
    961 
    962 def dump(value, fp, *, fmt=FMT_XML, sort_keys=True, skipkeys=False):
    963     """Write 'value' to a .plist file. 'fp' should be a (writable)
    964     file object.
    965     """
    966     if fmt not in _FORMATS:
    967         raise ValueError("Unsupported format: %r"%(fmt,))
    968 
    969     writer = _FORMATS[fmt]["writer"](fp, sort_keys=sort_keys, skipkeys=skipkeys)
    970     writer.write(value)
    971 
    972 
    973 def dumps(value, *, fmt=FMT_XML, skipkeys=False, sort_keys=True):
    974     """Return a bytes object with the contents for a .plist file.
    975     """
    976     fp = BytesIO()
    977     dump(value, fp, fmt=fmt, skipkeys=skipkeys, sort_keys=sort_keys)
    978     return fp.getvalue()
    979