Home | History | Annotate | Download | only in Lib
      1 """Create portable serialized representations of Python objects.
      2 
      3 See module copyreg for a mechanism for registering custom picklers.
      4 See module pickletools source for extensive comments.
      5 
      6 Classes:
      7 
      8     Pickler
      9     Unpickler
     10 
     11 Functions:
     12 
     13     dump(object, file)
     14     dumps(object) -> string
     15     load(file) -> object
     16     loads(string) -> object
     17 
     18 Misc variables:
     19 
     20     __version__
     21     format_version
     22     compatible_formats
     23 
     24 """
     25 
     26 from types import FunctionType
     27 from copyreg import dispatch_table
     28 from copyreg import _extension_registry, _inverted_registry, _extension_cache
     29 from itertools import islice
     30 from functools import partial
     31 import sys
     32 from sys import maxsize
     33 from struct import pack, unpack
     34 import re
     35 import io
     36 import codecs
     37 import _compat_pickle
     38 
     39 __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
     40            "Unpickler", "dump", "dumps", "load", "loads"]
     41 
     42 # Shortcut for use in isinstance testing
     43 bytes_types = (bytes, bytearray)
     44 
     45 # These are purely informational; no code uses these.
     46 format_version = "4.0"                  # File format version we write
     47 compatible_formats = ["1.0",            # Original protocol 0
     48                       "1.1",            # Protocol 0 with INST added
     49                       "1.2",            # Original protocol 1
     50                       "1.3",            # Protocol 1 with BINFLOAT added
     51                       "2.0",            # Protocol 2
     52                       "3.0",            # Protocol 3
     53                       "4.0",            # Protocol 4
     54                       ]                 # Old format versions we can read
     55 
     56 # This is the highest protocol number we know how to read.
     57 HIGHEST_PROTOCOL = 4
     58 
     59 # The protocol we write by default.  May be less than HIGHEST_PROTOCOL.
     60 # We intentionally write a protocol that Python 2.x cannot read;
     61 # there are too many issues with that.
     62 DEFAULT_PROTOCOL = 3
     63 
     64 class PickleError(Exception):
     65     """A common base class for the other pickling exceptions."""
     66     pass
     67 
     68 class PicklingError(PickleError):
     69     """This exception is raised when an unpicklable object is passed to the
     70     dump() method.
     71 
     72     """
     73     pass
     74 
     75 class UnpicklingError(PickleError):
     76     """This exception is raised when there is a problem unpickling an object,
     77     such as a security violation.
     78 
     79     Note that other exceptions may also be raised during unpickling, including
     80     (but not necessarily limited to) AttributeError, EOFError, ImportError,
     81     and IndexError.
     82 
     83     """
     84     pass
     85 
     86 # An instance of _Stop is raised by Unpickler.load_stop() in response to
     87 # the STOP opcode, passing the object that is the result of unpickling.
     88 class _Stop(Exception):
     89     def __init__(self, value):
     90         self.value = value
     91 
     92 # Jython has PyStringMap; it's a dict subclass with string keys
     93 try:
     94     from org.python.core import PyStringMap
     95 except ImportError:
     96     PyStringMap = None
     97 
     98 # Pickle opcodes.  See pickletools.py for extensive docs.  The listing
     99 # here is in kind-of alphabetical order of 1-character pickle code.
    100 # pickletools groups them by purpose.
    101 
    102 MARK           = b'('   # push special markobject on stack
    103 STOP           = b'.'   # every pickle ends with STOP
    104 POP            = b'0'   # discard topmost stack item
    105 POP_MARK       = b'1'   # discard stack top through topmost markobject
    106 DUP            = b'2'   # duplicate top stack item
    107 FLOAT          = b'F'   # push float object; decimal string argument
    108 INT            = b'I'   # push integer or bool; decimal string argument
    109 BININT         = b'J'   # push four-byte signed int
    110 BININT1        = b'K'   # push 1-byte unsigned int
    111 LONG           = b'L'   # push long; decimal string argument
    112 BININT2        = b'M'   # push 2-byte unsigned int
    113 NONE           = b'N'   # push None
    114 PERSID         = b'P'   # push persistent object; id is taken from string arg
    115 BINPERSID      = b'Q'   #  "       "         "  ;  "  "   "     "  stack
    116 REDUCE         = b'R'   # apply callable to argtuple, both on stack
    117 STRING         = b'S'   # push string; NL-terminated string argument
    118 BINSTRING      = b'T'   # push string; counted binary string argument
    119 SHORT_BINSTRING= b'U'   #  "     "   ;    "      "       "      " < 256 bytes
    120 UNICODE        = b'V'   # push Unicode string; raw-unicode-escaped'd argument
    121 BINUNICODE     = b'X'   #   "     "       "  ; counted UTF-8 string argument
    122 APPEND         = b'a'   # append stack top to list below it
    123 BUILD          = b'b'   # call __setstate__ or __dict__.update()
    124 GLOBAL         = b'c'   # push self.find_class(modname, name); 2 string args
    125 DICT           = b'd'   # build a dict from stack items
    126 EMPTY_DICT     = b'}'   # push empty dict
    127 APPENDS        = b'e'   # extend list on stack by topmost stack slice
    128 GET            = b'g'   # push item from memo on stack; index is string arg
    129 BINGET         = b'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
    130 INST           = b'i'   # build & push class instance
    131 LONG_BINGET    = b'j'   # push item from memo on stack; index is 4-byte arg
    132 LIST           = b'l'   # build list from topmost stack items
    133 EMPTY_LIST     = b']'   # push empty list
    134 OBJ            = b'o'   # build & push class instance
    135 PUT            = b'p'   # store stack top in memo; index is string arg
    136 BINPUT         = b'q'   #   "     "    "   "   " ;   "    " 1-byte arg
    137 LONG_BINPUT    = b'r'   #   "     "    "   "   " ;   "    " 4-byte arg
    138 SETITEM        = b's'   # add key+value pair to dict
    139 TUPLE          = b't'   # build tuple from topmost stack items
    140 EMPTY_TUPLE    = b')'   # push empty tuple
    141 SETITEMS       = b'u'   # modify dict by adding topmost key+value pairs
    142 BINFLOAT       = b'G'   # push float; arg is 8-byte float encoding
    143 
    144 TRUE           = b'I01\n'  # not an opcode; see INT docs in pickletools.py
    145 FALSE          = b'I00\n'  # not an opcode; see INT docs in pickletools.py
    146 
    147 # Protocol 2
    148 
    149 PROTO          = b'\x80'  # identify pickle protocol
    150 NEWOBJ         = b'\x81'  # build object by applying cls.__new__ to argtuple
    151 EXT1           = b'\x82'  # push object from extension registry; 1-byte index
    152 EXT2           = b'\x83'  # ditto, but 2-byte index
    153 EXT4           = b'\x84'  # ditto, but 4-byte index
    154 TUPLE1         = b'\x85'  # build 1-tuple from stack top
    155 TUPLE2         = b'\x86'  # build 2-tuple from two topmost stack items
    156 TUPLE3         = b'\x87'  # build 3-tuple from three topmost stack items
    157 NEWTRUE        = b'\x88'  # push True
    158 NEWFALSE       = b'\x89'  # push False
    159 LONG1          = b'\x8a'  # push long from < 256 bytes
    160 LONG4          = b'\x8b'  # push really big long
    161 
    162 _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
    163 
    164 # Protocol 3 (Python 3.x)
    165 
    166 BINBYTES       = b'B'   # push bytes; counted binary string argument
    167 SHORT_BINBYTES = b'C'   #  "     "   ;    "      "       "      " < 256 bytes
    168 
    169 # Protocol 4
    170 SHORT_BINUNICODE = b'\x8c'  # push short string; UTF-8 length < 256 bytes
    171 BINUNICODE8      = b'\x8d'  # push very long string
    172 BINBYTES8        = b'\x8e'  # push very long bytes string
    173 EMPTY_SET        = b'\x8f'  # push empty set on the stack
    174 ADDITEMS         = b'\x90'  # modify set by adding topmost stack items
    175 FROZENSET        = b'\x91'  # build frozenset from topmost stack items
    176 NEWOBJ_EX        = b'\x92'  # like NEWOBJ but work with keyword only arguments
    177 STACK_GLOBAL     = b'\x93'  # same as GLOBAL but using names on the stacks
    178 MEMOIZE          = b'\x94'  # store top of the stack in memo
    179 FRAME            = b'\x95'  # indicate the beginning of a new frame
    180 
    181 __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
    182 
    183 
    184 class _Framer:
    185 
    186     _FRAME_SIZE_TARGET = 64 * 1024
    187 
    188     def __init__(self, file_write):
    189         self.file_write = file_write
    190         self.current_frame = None
    191 
    192     def start_framing(self):
    193         self.current_frame = io.BytesIO()
    194 
    195     def end_framing(self):
    196         if self.current_frame and self.current_frame.tell() > 0:
    197             self.commit_frame(force=True)
    198             self.current_frame = None
    199 
    200     def commit_frame(self, force=False):
    201         if self.current_frame:
    202             f = self.current_frame
    203             if f.tell() >= self._FRAME_SIZE_TARGET or force:
    204                 with f.getbuffer() as data:
    205                     n = len(data)
    206                     write = self.file_write
    207                     write(FRAME)
    208                     write(pack("<Q", n))
    209                     write(data)
    210                 f.seek(0)
    211                 f.truncate()
    212 
    213     def write(self, data):
    214         if self.current_frame:
    215             return self.current_frame.write(data)
    216         else:
    217             return self.file_write(data)
    218 
    219 
    220 class _Unframer:
    221 
    222     def __init__(self, file_read, file_readline, file_tell=None):
    223         self.file_read = file_read
    224         self.file_readline = file_readline
    225         self.current_frame = None
    226 
    227     def read(self, n):
    228         if self.current_frame:
    229             data = self.current_frame.read(n)
    230             if not data and n != 0:
    231                 self.current_frame = None
    232                 return self.file_read(n)
    233             if len(data) < n:
    234                 raise UnpicklingError(
    235                     "pickle exhausted before end of frame")
    236             return data
    237         else:
    238             return self.file_read(n)
    239 
    240     def readline(self):
    241         if self.current_frame:
    242             data = self.current_frame.readline()
    243             if not data:
    244                 self.current_frame = None
    245                 return self.file_readline()
    246             if data[-1] != b'\n'[0]:
    247                 raise UnpicklingError(
    248                     "pickle exhausted before end of frame")
    249             return data
    250         else:
    251             return self.file_readline()
    252 
    253     def load_frame(self, frame_size):
    254         if self.current_frame and self.current_frame.read() != b'':
    255             raise UnpicklingError(
    256                 "beginning of a new frame before end of current frame")
    257         self.current_frame = io.BytesIO(self.file_read(frame_size))
    258 
    259 
    260 # Tools used for pickling.
    261 
    262 def _getattribute(obj, name):
    263     for subpath in name.split('.'):
    264         if subpath == '<locals>':
    265             raise AttributeError("Can't get local attribute {!r} on {!r}"
    266                                  .format(name, obj))
    267         try:
    268             parent = obj
    269             obj = getattr(obj, subpath)
    270         except AttributeError:
    271             raise AttributeError("Can't get attribute {!r} on {!r}"
    272                                  .format(name, obj))
    273     return obj, parent
    274 
    275 def whichmodule(obj, name):
    276     """Find the module an object belong to."""
    277     module_name = getattr(obj, '__module__', None)
    278     if module_name is not None:
    279         return module_name
    280     # Protect the iteration by using a list copy of sys.modules against dynamic
    281     # modules that trigger imports of other modules upon calls to getattr.
    282     for module_name, module in list(sys.modules.items()):
    283         if module_name == '__main__' or module is None:
    284             continue
    285         try:
    286             if _getattribute(module, name)[0] is obj:
    287                 return module_name
    288         except AttributeError:
    289             pass
    290     return '__main__'
    291 
    292 def encode_long(x):
    293     r"""Encode a long to a two's complement little-endian binary string.
    294     Note that 0 is a special case, returning an empty string, to save a
    295     byte in the LONG1 pickling context.
    296 
    297     >>> encode_long(0)
    298     b''
    299     >>> encode_long(255)
    300     b'\xff\x00'
    301     >>> encode_long(32767)
    302     b'\xff\x7f'
    303     >>> encode_long(-256)
    304     b'\x00\xff'
    305     >>> encode_long(-32768)
    306     b'\x00\x80'
    307     >>> encode_long(-128)
    308     b'\x80'
    309     >>> encode_long(127)
    310     b'\x7f'
    311     >>>
    312     """
    313     if x == 0:
    314         return b''
    315     nbytes = (x.bit_length() >> 3) + 1
    316     result = x.to_bytes(nbytes, byteorder='little', signed=True)
    317     if x < 0 and nbytes > 1:
    318         if result[-1] == 0xff and (result[-2] & 0x80) != 0:
    319             result = result[:-1]
    320     return result
    321 
    322 def decode_long(data):
    323     r"""Decode a long from a two's complement little-endian binary string.
    324 
    325     >>> decode_long(b'')
    326     0
    327     >>> decode_long(b"\xff\x00")
    328     255
    329     >>> decode_long(b"\xff\x7f")
    330     32767
    331     >>> decode_long(b"\x00\xff")
    332     -256
    333     >>> decode_long(b"\x00\x80")
    334     -32768
    335     >>> decode_long(b"\x80")
    336     -128
    337     >>> decode_long(b"\x7f")
    338     127
    339     """
    340     return int.from_bytes(data, byteorder='little', signed=True)
    341 
    342 
    343 # Pickling machinery
    344 
    345 class _Pickler:
    346 
    347     def __init__(self, file, protocol=None, *, fix_imports=True):
    348         """This takes a binary file for writing a pickle data stream.
    349 
    350         The optional *protocol* argument tells the pickler to use the
    351         given protocol; supported protocols are 0, 1, 2, 3 and 4.  The
    352         default protocol is 3; a backward-incompatible protocol designed
    353         for Python 3.
    354 
    355         Specifying a negative protocol version selects the highest
    356         protocol version supported.  The higher the protocol used, the
    357         more recent the version of Python needed to read the pickle
    358         produced.
    359 
    360         The *file* argument must have a write() method that accepts a
    361         single bytes argument. It can thus be a file object opened for
    362         binary writing, an io.BytesIO instance, or any other custom
    363         object that meets this interface.
    364 
    365         If *fix_imports* is True and *protocol* is less than 3, pickle
    366         will try to map the new Python 3 names to the old module names
    367         used in Python 2, so that the pickle data stream is readable
    368         with Python 2.
    369         """
    370         if protocol is None:
    371             protocol = DEFAULT_PROTOCOL
    372         if protocol < 0:
    373             protocol = HIGHEST_PROTOCOL
    374         elif not 0 <= protocol <= HIGHEST_PROTOCOL:
    375             raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
    376         try:
    377             self._file_write = file.write
    378         except AttributeError:
    379             raise TypeError("file must have a 'write' attribute")
    380         self.framer = _Framer(self._file_write)
    381         self.write = self.framer.write
    382         self.memo = {}
    383         self.proto = int(protocol)
    384         self.bin = protocol >= 1
    385         self.fast = 0
    386         self.fix_imports = fix_imports and protocol < 3
    387 
    388     def clear_memo(self):
    389         """Clears the pickler's "memo".
    390 
    391         The memo is the data structure that remembers which objects the
    392         pickler has already seen, so that shared or recursive objects
    393         are pickled by reference and not by value.  This method is
    394         useful when re-using picklers.
    395         """
    396         self.memo.clear()
    397 
    398     def dump(self, obj):
    399         """Write a pickled representation of obj to the open file."""
    400         # Check whether Pickler was initialized correctly. This is
    401         # only needed to mimic the behavior of _pickle.Pickler.dump().
    402         if not hasattr(self, "_file_write"):
    403             raise PicklingError("Pickler.__init__() was not called by "
    404                                 "%s.__init__()" % (self.__class__.__name__,))
    405         if self.proto >= 2:
    406             self.write(PROTO + pack("<B", self.proto))
    407         if self.proto >= 4:
    408             self.framer.start_framing()
    409         self.save(obj)
    410         self.write(STOP)
    411         self.framer.end_framing()
    412 
    413     def memoize(self, obj):
    414         """Store an object in the memo."""
    415 
    416         # The Pickler memo is a dictionary mapping object ids to 2-tuples
    417         # that contain the Unpickler memo key and the object being memoized.
    418         # The memo key is written to the pickle and will become
    419         # the key in the Unpickler's memo.  The object is stored in the
    420         # Pickler memo so that transient objects are kept alive during
    421         # pickling.
    422 
    423         # The use of the Unpickler memo length as the memo key is just a
    424         # convention.  The only requirement is that the memo values be unique.
    425         # But there appears no advantage to any other scheme, and this
    426         # scheme allows the Unpickler memo to be implemented as a plain (but
    427         # growable) array, indexed by memo key.
    428         if self.fast:
    429             return
    430         assert id(obj) not in self.memo
    431         idx = len(self.memo)
    432         self.write(self.put(idx))
    433         self.memo[id(obj)] = idx, obj
    434 
    435     # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
    436     def put(self, idx):
    437         if self.proto >= 4:
    438             return MEMOIZE
    439         elif self.bin:
    440             if idx < 256:
    441                 return BINPUT + pack("<B", idx)
    442             else:
    443                 return LONG_BINPUT + pack("<I", idx)
    444         else:
    445             return PUT + repr(idx).encode("ascii") + b'\n'
    446 
    447     # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
    448     def get(self, i):
    449         if self.bin:
    450             if i < 256:
    451                 return BINGET + pack("<B", i)
    452             else:
    453                 return LONG_BINGET + pack("<I", i)
    454 
    455         return GET + repr(i).encode("ascii") + b'\n'
    456 
    457     def save(self, obj, save_persistent_id=True):
    458         self.framer.commit_frame()
    459 
    460         # Check for persistent id (defined by a subclass)
    461         pid = self.persistent_id(obj)
    462         if pid is not None and save_persistent_id:
    463             self.save_pers(pid)
    464             return
    465 
    466         # Check the memo
    467         x = self.memo.get(id(obj))
    468         if x is not None:
    469             self.write(self.get(x[0]))
    470             return
    471 
    472         # Check the type dispatch table
    473         t = type(obj)
    474         f = self.dispatch.get(t)
    475         if f is not None:
    476             f(self, obj) # Call unbound method with explicit self
    477             return
    478 
    479         # Check private dispatch table if any, or else copyreg.dispatch_table
    480         reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
    481         if reduce is not None:
    482             rv = reduce(obj)
    483         else:
    484             # Check for a class with a custom metaclass; treat as regular class
    485             try:
    486                 issc = issubclass(t, type)
    487             except TypeError: # t is not a class (old Boost; see SF #502085)
    488                 issc = False
    489             if issc:
    490                 self.save_global(obj)
    491                 return
    492 
    493             # Check for a __reduce_ex__ method, fall back to __reduce__
    494             reduce = getattr(obj, "__reduce_ex__", None)
    495             if reduce is not None:
    496                 rv = reduce(self.proto)
    497             else:
    498                 reduce = getattr(obj, "__reduce__", None)
    499                 if reduce is not None:
    500                     rv = reduce()
    501                 else:
    502                     raise PicklingError("Can't pickle %r object: %r" %
    503                                         (t.__name__, obj))
    504 
    505         # Check for string returned by reduce(), meaning "save as global"
    506         if isinstance(rv, str):
    507             self.save_global(obj, rv)
    508             return
    509 
    510         # Assert that reduce() returned a tuple
    511         if not isinstance(rv, tuple):
    512             raise PicklingError("%s must return string or tuple" % reduce)
    513 
    514         # Assert that it returned an appropriately sized tuple
    515         l = len(rv)
    516         if not (2 <= l <= 5):
    517             raise PicklingError("Tuple returned by %s must have "
    518                                 "two to five elements" % reduce)
    519 
    520         # Save the reduce() output and finally memoize the object
    521         self.save_reduce(obj=obj, *rv)
    522 
    523     def persistent_id(self, obj):
    524         # This exists so a subclass can override it
    525         return None
    526 
    527     def save_pers(self, pid):
    528         # Save a persistent id reference
    529         if self.bin:
    530             self.save(pid, save_persistent_id=False)
    531             self.write(BINPERSID)
    532         else:
    533             try:
    534                 self.write(PERSID + str(pid).encode("ascii") + b'\n')
    535             except UnicodeEncodeError:
    536                 raise PicklingError(
    537                     "persistent IDs in protocol 0 must be ASCII strings")
    538 
    539     def save_reduce(self, func, args, state=None, listitems=None,
    540                     dictitems=None, obj=None):
    541         # This API is called by some subclasses
    542 
    543         if not isinstance(args, tuple):
    544             raise PicklingError("args from save_reduce() must be a tuple")
    545         if not callable(func):
    546             raise PicklingError("func from save_reduce() must be callable")
    547 
    548         save = self.save
    549         write = self.write
    550 
    551         func_name = getattr(func, "__name__", "")
    552         if self.proto >= 2 and func_name == "__newobj_ex__":
    553             cls, args, kwargs = args
    554             if not hasattr(cls, "__new__"):
    555                 raise PicklingError("args[0] from {} args has no __new__"
    556                                     .format(func_name))
    557             if obj is not None and cls is not obj.__class__:
    558                 raise PicklingError("args[0] from {} args has the wrong class"
    559                                     .format(func_name))
    560             if self.proto >= 4:
    561                 save(cls)
    562                 save(args)
    563                 save(kwargs)
    564                 write(NEWOBJ_EX)
    565             else:
    566                 func = partial(cls.__new__, cls, *args, **kwargs)
    567                 save(func)
    568                 save(())
    569                 write(REDUCE)
    570         elif self.proto >= 2 and func_name == "__newobj__":
    571             # A __reduce__ implementation can direct protocol 2 or newer to
    572             # use the more efficient NEWOBJ opcode, while still
    573             # allowing protocol 0 and 1 to work normally.  For this to
    574             # work, the function returned by __reduce__ should be
    575             # called __newobj__, and its first argument should be a
    576             # class.  The implementation for __newobj__
    577             # should be as follows, although pickle has no way to
    578             # verify this:
    579             #
    580             # def __newobj__(cls, *args):
    581             #     return cls.__new__(cls, *args)
    582             #
    583             # Protocols 0 and 1 will pickle a reference to __newobj__,
    584             # while protocol 2 (and above) will pickle a reference to
    585             # cls, the remaining args tuple, and the NEWOBJ code,
    586             # which calls cls.__new__(cls, *args) at unpickling time
    587             # (see load_newobj below).  If __reduce__ returns a
    588             # three-tuple, the state from the third tuple item will be
    589             # pickled regardless of the protocol, calling __setstate__
    590             # at unpickling time (see load_build below).
    591             #
    592             # Note that no standard __newobj__ implementation exists;
    593             # you have to provide your own.  This is to enforce
    594             # compatibility with Python 2.2 (pickles written using
    595             # protocol 0 or 1 in Python 2.3 should be unpicklable by
    596             # Python 2.2).
    597             cls = args[0]
    598             if not hasattr(cls, "__new__"):
    599                 raise PicklingError(
    600                     "args[0] from __newobj__ args has no __new__")
    601             if obj is not None and cls is not obj.__class__:
    602                 raise PicklingError(
    603                     "args[0] from __newobj__ args has the wrong class")
    604             args = args[1:]
    605             save(cls)
    606             save(args)
    607             write(NEWOBJ)
    608         else:
    609             save(func)
    610             save(args)
    611             write(REDUCE)
    612 
    613         if obj is not None:
    614             # If the object is already in the memo, this means it is
    615             # recursive. In this case, throw away everything we put on the
    616             # stack, and fetch the object back from the memo.
    617             if id(obj) in self.memo:
    618                 write(POP + self.get(self.memo[id(obj)][0]))
    619             else:
    620                 self.memoize(obj)
    621 
    622         # More new special cases (that work with older protocols as
    623         # well): when __reduce__ returns a tuple with 4 or 5 items,
    624         # the 4th and 5th item should be iterators that provide list
    625         # items and dict items (as (key, value) tuples), or None.
    626 
    627         if listitems is not None:
    628             self._batch_appends(listitems)
    629 
    630         if dictitems is not None:
    631             self._batch_setitems(dictitems)
    632 
    633         if state is not None:
    634             save(state)
    635             write(BUILD)
    636 
    637     # Methods below this point are dispatched through the dispatch table
    638 
    639     dispatch = {}
    640 
    641     def save_none(self, obj):
    642         self.write(NONE)
    643     dispatch[type(None)] = save_none
    644 
    645     def save_bool(self, obj):
    646         if self.proto >= 2:
    647             self.write(NEWTRUE if obj else NEWFALSE)
    648         else:
    649             self.write(TRUE if obj else FALSE)
    650     dispatch[bool] = save_bool
    651 
    652     def save_long(self, obj):
    653         if self.bin:
    654             # If the int is small enough to fit in a signed 4-byte 2's-comp
    655             # format, we can store it more efficiently than the general
    656             # case.
    657             # First one- and two-byte unsigned ints:
    658             if obj >= 0:
    659                 if obj <= 0xff:
    660                     self.write(BININT1 + pack("<B", obj))
    661                     return
    662                 if obj <= 0xffff:
    663                     self.write(BININT2 + pack("<H", obj))
    664                     return
    665             # Next check for 4-byte signed ints:
    666             if -0x80000000 <= obj <= 0x7fffffff:
    667                 self.write(BININT + pack("<i", obj))
    668                 return
    669         if self.proto >= 2:
    670             encoded = encode_long(obj)
    671             n = len(encoded)
    672             if n < 256:
    673                 self.write(LONG1 + pack("<B", n) + encoded)
    674             else:
    675                 self.write(LONG4 + pack("<i", n) + encoded)
    676             return
    677         self.write(LONG + repr(obj).encode("ascii") + b'L\n')
    678     dispatch[int] = save_long
    679 
    680     def save_float(self, obj):
    681         if self.bin:
    682             self.write(BINFLOAT + pack('>d', obj))
    683         else:
    684             self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
    685     dispatch[float] = save_float
    686 
    687     def save_bytes(self, obj):
    688         if self.proto < 3:
    689             if not obj: # bytes object is empty
    690                 self.save_reduce(bytes, (), obj=obj)
    691             else:
    692                 self.save_reduce(codecs.encode,
    693                                  (str(obj, 'latin1'), 'latin1'), obj=obj)
    694             return
    695         n = len(obj)
    696         if n <= 0xff:
    697             self.write(SHORT_BINBYTES + pack("<B", n) + obj)
    698         elif n > 0xffffffff and self.proto >= 4:
    699             self.write(BINBYTES8 + pack("<Q", n) + obj)
    700         else:
    701             self.write(BINBYTES + pack("<I", n) + obj)
    702         self.memoize(obj)
    703     dispatch[bytes] = save_bytes
    704 
    705     def save_str(self, obj):
    706         if self.bin:
    707             encoded = obj.encode('utf-8', 'surrogatepass')
    708             n = len(encoded)
    709             if n <= 0xff and self.proto >= 4:
    710                 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
    711             elif n > 0xffffffff and self.proto >= 4:
    712                 self.write(BINUNICODE8 + pack("<Q", n) + encoded)
    713             else:
    714                 self.write(BINUNICODE + pack("<I", n) + encoded)
    715         else:
    716             obj = obj.replace("\\", "\\u005c")
    717             obj = obj.replace("\n", "\\u000a")
    718             self.write(UNICODE + obj.encode('raw-unicode-escape') +
    719                        b'\n')
    720         self.memoize(obj)
    721     dispatch[str] = save_str
    722 
    723     def save_tuple(self, obj):
    724         if not obj: # tuple is empty
    725             if self.bin:
    726                 self.write(EMPTY_TUPLE)
    727             else:
    728                 self.write(MARK + TUPLE)
    729             return
    730 
    731         n = len(obj)
    732         save = self.save
    733         memo = self.memo
    734         if n <= 3 and self.proto >= 2:
    735             for element in obj:
    736                 save(element)
    737             # Subtle.  Same as in the big comment below.
    738             if id(obj) in memo:
    739                 get = self.get(memo[id(obj)][0])
    740                 self.write(POP * n + get)
    741             else:
    742                 self.write(_tuplesize2code[n])
    743                 self.memoize(obj)
    744             return
    745 
    746         # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
    747         # has more than 3 elements.
    748         write = self.write
    749         write(MARK)
    750         for element in obj:
    751             save(element)
    752 
    753         if id(obj) in memo:
    754             # Subtle.  d was not in memo when we entered save_tuple(), so
    755             # the process of saving the tuple's elements must have saved
    756             # the tuple itself:  the tuple is recursive.  The proper action
    757             # now is to throw away everything we put on the stack, and
    758             # simply GET the tuple (it's already constructed).  This check
    759             # could have been done in the "for element" loop instead, but
    760             # recursive tuples are a rare thing.
    761             get = self.get(memo[id(obj)][0])
    762             if self.bin:
    763                 write(POP_MARK + get)
    764             else:   # proto 0 -- POP_MARK not available
    765                 write(POP * (n+1) + get)
    766             return
    767 
    768         # No recursion.
    769         write(TUPLE)
    770         self.memoize(obj)
    771 
    772     dispatch[tuple] = save_tuple
    773 
    774     def save_list(self, obj):
    775         if self.bin:
    776             self.write(EMPTY_LIST)
    777         else:   # proto 0 -- can't use EMPTY_LIST
    778             self.write(MARK + LIST)
    779 
    780         self.memoize(obj)
    781         self._batch_appends(obj)
    782 
    783     dispatch[list] = save_list
    784 
    785     _BATCHSIZE = 1000
    786 
    787     def _batch_appends(self, items):
    788         # Helper to batch up APPENDS sequences
    789         save = self.save
    790         write = self.write
    791 
    792         if not self.bin:
    793             for x in items:
    794                 save(x)
    795                 write(APPEND)
    796             return
    797 
    798         it = iter(items)
    799         while True:
    800             tmp = list(islice(it, self._BATCHSIZE))
    801             n = len(tmp)
    802             if n > 1:
    803                 write(MARK)
    804                 for x in tmp:
    805                     save(x)
    806                 write(APPENDS)
    807             elif n:
    808                 save(tmp[0])
    809                 write(APPEND)
    810             # else tmp is empty, and we're done
    811             if n < self._BATCHSIZE:
    812                 return
    813 
    814     def save_dict(self, obj):
    815         if self.bin:
    816             self.write(EMPTY_DICT)
    817         else:   # proto 0 -- can't use EMPTY_DICT
    818             self.write(MARK + DICT)
    819 
    820         self.memoize(obj)
    821         self._batch_setitems(obj.items())
    822 
    823     dispatch[dict] = save_dict
    824     if PyStringMap is not None:
    825         dispatch[PyStringMap] = save_dict
    826 
    827     def _batch_setitems(self, items):
    828         # Helper to batch up SETITEMS sequences; proto >= 1 only
    829         save = self.save
    830         write = self.write
    831 
    832         if not self.bin:
    833             for k, v in items:
    834                 save(k)
    835                 save(v)
    836                 write(SETITEM)
    837             return
    838 
    839         it = iter(items)
    840         while True:
    841             tmp = list(islice(it, self._BATCHSIZE))
    842             n = len(tmp)
    843             if n > 1:
    844                 write(MARK)
    845                 for k, v in tmp:
    846                     save(k)
    847                     save(v)
    848                 write(SETITEMS)
    849             elif n:
    850                 k, v = tmp[0]
    851                 save(k)
    852                 save(v)
    853                 write(SETITEM)
    854             # else tmp is empty, and we're done
    855             if n < self._BATCHSIZE:
    856                 return
    857 
    858     def save_set(self, obj):
    859         save = self.save
    860         write = self.write
    861 
    862         if self.proto < 4:
    863             self.save_reduce(set, (list(obj),), obj=obj)
    864             return
    865 
    866         write(EMPTY_SET)
    867         self.memoize(obj)
    868 
    869         it = iter(obj)
    870         while True:
    871             batch = list(islice(it, self._BATCHSIZE))
    872             n = len(batch)
    873             if n > 0:
    874                 write(MARK)
    875                 for item in batch:
    876                     save(item)
    877                 write(ADDITEMS)
    878             if n < self._BATCHSIZE:
    879                 return
    880     dispatch[set] = save_set
    881 
    882     def save_frozenset(self, obj):
    883         save = self.save
    884         write = self.write
    885 
    886         if self.proto < 4:
    887             self.save_reduce(frozenset, (list(obj),), obj=obj)
    888             return
    889 
    890         write(MARK)
    891         for item in obj:
    892             save(item)
    893 
    894         if id(obj) in self.memo:
    895             # If the object is already in the memo, this means it is
    896             # recursive. In this case, throw away everything we put on the
    897             # stack, and fetch the object back from the memo.
    898             write(POP_MARK + self.get(self.memo[id(obj)][0]))
    899             return
    900 
    901         write(FROZENSET)
    902         self.memoize(obj)
    903     dispatch[frozenset] = save_frozenset
    904 
    905     def save_global(self, obj, name=None):
    906         write = self.write
    907         memo = self.memo
    908 
    909         if name is None:
    910             name = getattr(obj, '__qualname__', None)
    911         if name is None:
    912             name = obj.__name__
    913 
    914         module_name = whichmodule(obj, name)
    915         try:
    916             __import__(module_name, level=0)
    917             module = sys.modules[module_name]
    918             obj2, parent = _getattribute(module, name)
    919         except (ImportError, KeyError, AttributeError):
    920             raise PicklingError(
    921                 "Can't pickle %r: it's not found as %s.%s" %
    922                 (obj, module_name, name))
    923         else:
    924             if obj2 is not obj:
    925                 raise PicklingError(
    926                     "Can't pickle %r: it's not the same object as %s.%s" %
    927                     (obj, module_name, name))
    928 
    929         if self.proto >= 2:
    930             code = _extension_registry.get((module_name, name))
    931             if code:
    932                 assert code > 0
    933                 if code <= 0xff:
    934                     write(EXT1 + pack("<B", code))
    935                 elif code <= 0xffff:
    936                     write(EXT2 + pack("<H", code))
    937                 else:
    938                     write(EXT4 + pack("<i", code))
    939                 return
    940         lastname = name.rpartition('.')[2]
    941         if parent is module:
    942             name = lastname
    943         # Non-ASCII identifiers are supported only with protocols >= 3.
    944         if self.proto >= 4:
    945             self.save(module_name)
    946             self.save(name)
    947             write(STACK_GLOBAL)
    948         elif parent is not module:
    949             self.save_reduce(getattr, (parent, lastname))
    950         elif self.proto >= 3:
    951             write(GLOBAL + bytes(module_name, "utf-8") + b'\n' +
    952                   bytes(name, "utf-8") + b'\n')
    953         else:
    954             if self.fix_imports:
    955                 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING
    956                 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING
    957                 if (module_name, name) in r_name_mapping:
    958                     module_name, name = r_name_mapping[(module_name, name)]
    959                 elif module_name in r_import_mapping:
    960                     module_name = r_import_mapping[module_name]
    961             try:
    962                 write(GLOBAL + bytes(module_name, "ascii") + b'\n' +
    963                       bytes(name, "ascii") + b'\n')
    964             except UnicodeEncodeError:
    965                 raise PicklingError(
    966                     "can't pickle global identifier '%s.%s' using "
    967                     "pickle protocol %i" % (module, name, self.proto))
    968 
    969         self.memoize(obj)
    970 
    971     def save_type(self, obj):
    972         if obj is type(None):
    973             return self.save_reduce(type, (None,), obj=obj)
    974         elif obj is type(NotImplemented):
    975             return self.save_reduce(type, (NotImplemented,), obj=obj)
    976         elif obj is type(...):
    977             return self.save_reduce(type, (...,), obj=obj)
    978         return self.save_global(obj)
    979 
    980     dispatch[FunctionType] = save_global
    981     dispatch[type] = save_type
    982 
    983 
    984 # Unpickling machinery
    985 
    986 class _Unpickler:
    987 
    988     def __init__(self, file, *, fix_imports=True,
    989                  encoding="ASCII", errors="strict"):
    990         """This takes a binary file for reading a pickle data stream.
    991 
    992         The protocol version of the pickle is detected automatically, so
    993         no proto argument is needed.
    994 
    995         The argument *file* must have two methods, a read() method that
    996         takes an integer argument, and a readline() method that requires
    997         no arguments.  Both methods should return bytes.  Thus *file*
    998         can be a binary file object opened for reading, an io.BytesIO
    999         object, or any other custom object that meets this interface.
   1000 
   1001         The file-like object must have two methods, a read() method
   1002         that takes an integer argument, and a readline() method that
   1003         requires no arguments.  Both methods should return bytes.
   1004         Thus file-like object can be a binary file object opened for
   1005         reading, a BytesIO object, or any other custom object that
   1006         meets this interface.
   1007 
   1008         Optional keyword arguments are *fix_imports*, *encoding* and
   1009         *errors*, which are used to control compatibility support for
   1010         pickle stream generated by Python 2.  If *fix_imports* is True,
   1011         pickle will try to map the old Python 2 names to the new names
   1012         used in Python 3.  The *encoding* and *errors* tell pickle how
   1013         to decode 8-bit string instances pickled by Python 2; these
   1014         default to 'ASCII' and 'strict', respectively. *encoding* can be
   1015         'bytes' to read theses 8-bit string instances as bytes objects.
   1016         """
   1017         self._file_readline = file.readline
   1018         self._file_read = file.read
   1019         self.memo = {}
   1020         self.encoding = encoding
   1021         self.errors = errors
   1022         self.proto = 0
   1023         self.fix_imports = fix_imports
   1024 
   1025     def load(self):
   1026         """Read a pickled object representation from the open file.
   1027 
   1028         Return the reconstituted object hierarchy specified in the file.
   1029         """
   1030         # Check whether Unpickler was initialized correctly. This is
   1031         # only needed to mimic the behavior of _pickle.Unpickler.dump().
   1032         if not hasattr(self, "_file_read"):
   1033             raise UnpicklingError("Unpickler.__init__() was not called by "
   1034                                   "%s.__init__()" % (self.__class__.__name__,))
   1035         self._unframer = _Unframer(self._file_read, self._file_readline)
   1036         self.read = self._unframer.read
   1037         self.readline = self._unframer.readline
   1038         self.metastack = []
   1039         self.stack = []
   1040         self.append = self.stack.append
   1041         self.proto = 0
   1042         read = self.read
   1043         dispatch = self.dispatch
   1044         try:
   1045             while True:
   1046                 key = read(1)
   1047                 if not key:
   1048                     raise EOFError
   1049                 assert isinstance(key, bytes_types)
   1050                 dispatch[key[0]](self)
   1051         except _Stop as stopinst:
   1052             return stopinst.value
   1053 
   1054     # Return a list of items pushed in the stack after last MARK instruction.
   1055     def pop_mark(self):
   1056         items = self.stack
   1057         self.stack = self.metastack.pop()
   1058         self.append = self.stack.append
   1059         return items
   1060 
   1061     def persistent_load(self, pid):
   1062         raise UnpicklingError("unsupported persistent id encountered")
   1063 
   1064     dispatch = {}
   1065 
   1066     def load_proto(self):
   1067         proto = self.read(1)[0]
   1068         if not 0 <= proto <= HIGHEST_PROTOCOL:
   1069             raise ValueError("unsupported pickle protocol: %d" % proto)
   1070         self.proto = proto
   1071     dispatch[PROTO[0]] = load_proto
   1072 
   1073     def load_frame(self):
   1074         frame_size, = unpack('<Q', self.read(8))
   1075         if frame_size > sys.maxsize:
   1076             raise ValueError("frame size > sys.maxsize: %d" % frame_size)
   1077         self._unframer.load_frame(frame_size)
   1078     dispatch[FRAME[0]] = load_frame
   1079 
   1080     def load_persid(self):
   1081         try:
   1082             pid = self.readline()[:-1].decode("ascii")
   1083         except UnicodeDecodeError:
   1084             raise UnpicklingError(
   1085                 "persistent IDs in protocol 0 must be ASCII strings")
   1086         self.append(self.persistent_load(pid))
   1087     dispatch[PERSID[0]] = load_persid
   1088 
   1089     def load_binpersid(self):
   1090         pid = self.stack.pop()
   1091         self.append(self.persistent_load(pid))
   1092     dispatch[BINPERSID[0]] = load_binpersid
   1093 
   1094     def load_none(self):
   1095         self.append(None)
   1096     dispatch[NONE[0]] = load_none
   1097 
   1098     def load_false(self):
   1099         self.append(False)
   1100     dispatch[NEWFALSE[0]] = load_false
   1101 
   1102     def load_true(self):
   1103         self.append(True)
   1104     dispatch[NEWTRUE[0]] = load_true
   1105 
   1106     def load_int(self):
   1107         data = self.readline()
   1108         if data == FALSE[1:]:
   1109             val = False
   1110         elif data == TRUE[1:]:
   1111             val = True
   1112         else:
   1113             val = int(data, 0)
   1114         self.append(val)
   1115     dispatch[INT[0]] = load_int
   1116 
   1117     def load_binint(self):
   1118         self.append(unpack('<i', self.read(4))[0])
   1119     dispatch[BININT[0]] = load_binint
   1120 
   1121     def load_binint1(self):
   1122         self.append(self.read(1)[0])
   1123     dispatch[BININT1[0]] = load_binint1
   1124 
   1125     def load_binint2(self):
   1126         self.append(unpack('<H', self.read(2))[0])
   1127     dispatch[BININT2[0]] = load_binint2
   1128 
   1129     def load_long(self):
   1130         val = self.readline()[:-1]
   1131         if val and val[-1] == b'L'[0]:
   1132             val = val[:-1]
   1133         self.append(int(val, 0))
   1134     dispatch[LONG[0]] = load_long
   1135 
   1136     def load_long1(self):
   1137         n = self.read(1)[0]
   1138         data = self.read(n)
   1139         self.append(decode_long(data))
   1140     dispatch[LONG1[0]] = load_long1
   1141 
   1142     def load_long4(self):
   1143         n, = unpack('<i', self.read(4))
   1144         if n < 0:
   1145             # Corrupt or hostile pickle -- we never write one like this
   1146             raise UnpicklingError("LONG pickle has negative byte count")
   1147         data = self.read(n)
   1148         self.append(decode_long(data))
   1149     dispatch[LONG4[0]] = load_long4
   1150 
   1151     def load_float(self):
   1152         self.append(float(self.readline()[:-1]))
   1153     dispatch[FLOAT[0]] = load_float
   1154 
   1155     def load_binfloat(self):
   1156         self.append(unpack('>d', self.read(8))[0])
   1157     dispatch[BINFLOAT[0]] = load_binfloat
   1158 
   1159     def _decode_string(self, value):
   1160         # Used to allow strings from Python 2 to be decoded either as
   1161         # bytes or Unicode strings.  This should be used only with the
   1162         # STRING, BINSTRING and SHORT_BINSTRING opcodes.
   1163         if self.encoding == "bytes":
   1164             return value
   1165         else:
   1166             return value.decode(self.encoding, self.errors)
   1167 
   1168     def load_string(self):
   1169         data = self.readline()[:-1]
   1170         # Strip outermost quotes
   1171         if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
   1172             data = data[1:-1]
   1173         else:
   1174             raise UnpicklingError("the STRING opcode argument must be quoted")
   1175         self.append(self._decode_string(codecs.escape_decode(data)[0]))
   1176     dispatch[STRING[0]] = load_string
   1177 
   1178     def load_binstring(self):
   1179         # Deprecated BINSTRING uses signed 32-bit length
   1180         len, = unpack('<i', self.read(4))
   1181         if len < 0:
   1182             raise UnpicklingError("BINSTRING pickle has negative byte count")
   1183         data = self.read(len)
   1184         self.append(self._decode_string(data))
   1185     dispatch[BINSTRING[0]] = load_binstring
   1186 
   1187     def load_binbytes(self):
   1188         len, = unpack('<I', self.read(4))
   1189         if len > maxsize:
   1190             raise UnpicklingError("BINBYTES exceeds system's maximum size "
   1191                                   "of %d bytes" % maxsize)
   1192         self.append(self.read(len))
   1193     dispatch[BINBYTES[0]] = load_binbytes
   1194 
   1195     def load_unicode(self):
   1196         self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
   1197     dispatch[UNICODE[0]] = load_unicode
   1198 
   1199     def load_binunicode(self):
   1200         len, = unpack('<I', self.read(4))
   1201         if len > maxsize:
   1202             raise UnpicklingError("BINUNICODE exceeds system's maximum size "
   1203                                   "of %d bytes" % maxsize)
   1204         self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
   1205     dispatch[BINUNICODE[0]] = load_binunicode
   1206 
   1207     def load_binunicode8(self):
   1208         len, = unpack('<Q', self.read(8))
   1209         if len > maxsize:
   1210             raise UnpicklingError("BINUNICODE8 exceeds system's maximum size "
   1211                                   "of %d bytes" % maxsize)
   1212         self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
   1213     dispatch[BINUNICODE8[0]] = load_binunicode8
   1214 
   1215     def load_binbytes8(self):
   1216         len, = unpack('<Q', self.read(8))
   1217         if len > maxsize:
   1218             raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
   1219                                   "of %d bytes" % maxsize)
   1220         self.append(self.read(len))
   1221     dispatch[BINBYTES8[0]] = load_binbytes8
   1222 
   1223     def load_short_binstring(self):
   1224         len = self.read(1)[0]
   1225         data = self.read(len)
   1226         self.append(self._decode_string(data))
   1227     dispatch[SHORT_BINSTRING[0]] = load_short_binstring
   1228 
   1229     def load_short_binbytes(self):
   1230         len = self.read(1)[0]
   1231         self.append(self.read(len))
   1232     dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
   1233 
   1234     def load_short_binunicode(self):
   1235         len = self.read(1)[0]
   1236         self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
   1237     dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
   1238 
   1239     def load_tuple(self):
   1240         items = self.pop_mark()
   1241         self.append(tuple(items))
   1242     dispatch[TUPLE[0]] = load_tuple
   1243 
   1244     def load_empty_tuple(self):
   1245         self.append(())
   1246     dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
   1247 
   1248     def load_tuple1(self):
   1249         self.stack[-1] = (self.stack[-1],)
   1250     dispatch[TUPLE1[0]] = load_tuple1
   1251 
   1252     def load_tuple2(self):
   1253         self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
   1254     dispatch[TUPLE2[0]] = load_tuple2
   1255 
   1256     def load_tuple3(self):
   1257         self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
   1258     dispatch[TUPLE3[0]] = load_tuple3
   1259 
   1260     def load_empty_list(self):
   1261         self.append([])
   1262     dispatch[EMPTY_LIST[0]] = load_empty_list
   1263 
   1264     def load_empty_dictionary(self):
   1265         self.append({})
   1266     dispatch[EMPTY_DICT[0]] = load_empty_dictionary
   1267 
   1268     def load_empty_set(self):
   1269         self.append(set())
   1270     dispatch[EMPTY_SET[0]] = load_empty_set
   1271 
   1272     def load_frozenset(self):
   1273         items = self.pop_mark()
   1274         self.append(frozenset(items))
   1275     dispatch[FROZENSET[0]] = load_frozenset
   1276 
   1277     def load_list(self):
   1278         items = self.pop_mark()
   1279         self.append(items)
   1280     dispatch[LIST[0]] = load_list
   1281 
   1282     def load_dict(self):
   1283         items = self.pop_mark()
   1284         d = {items[i]: items[i+1]
   1285              for i in range(0, len(items), 2)}
   1286         self.append(d)
   1287     dispatch[DICT[0]] = load_dict
   1288 
   1289     # INST and OBJ differ only in how they get a class object.  It's not
   1290     # only sensible to do the rest in a common routine, the two routines
   1291     # previously diverged and grew different bugs.
   1292     # klass is the class to instantiate, and k points to the topmost mark
   1293     # object, following which are the arguments for klass.__init__.
   1294     def _instantiate(self, klass, args):
   1295         if (args or not isinstance(klass, type) or
   1296             hasattr(klass, "__getinitargs__")):
   1297             try:
   1298                 value = klass(*args)
   1299             except TypeError as err:
   1300                 raise TypeError("in constructor for %s: %s" %
   1301                                 (klass.__name__, str(err)), sys.exc_info()[2])
   1302         else:
   1303             value = klass.__new__(klass)
   1304         self.append(value)
   1305 
   1306     def load_inst(self):
   1307         module = self.readline()[:-1].decode("ascii")
   1308         name = self.readline()[:-1].decode("ascii")
   1309         klass = self.find_class(module, name)
   1310         self._instantiate(klass, self.pop_mark())
   1311     dispatch[INST[0]] = load_inst
   1312 
   1313     def load_obj(self):
   1314         # Stack is ... markobject classobject arg1 arg2 ...
   1315         args = self.pop_mark()
   1316         cls = args.pop(0)
   1317         self._instantiate(cls, args)
   1318     dispatch[OBJ[0]] = load_obj
   1319 
   1320     def load_newobj(self):
   1321         args = self.stack.pop()
   1322         cls = self.stack.pop()
   1323         obj = cls.__new__(cls, *args)
   1324         self.append(obj)
   1325     dispatch[NEWOBJ[0]] = load_newobj
   1326 
   1327     def load_newobj_ex(self):
   1328         kwargs = self.stack.pop()
   1329         args = self.stack.pop()
   1330         cls = self.stack.pop()
   1331         obj = cls.__new__(cls, *args, **kwargs)
   1332         self.append(obj)
   1333     dispatch[NEWOBJ_EX[0]] = load_newobj_ex
   1334 
   1335     def load_global(self):
   1336         module = self.readline()[:-1].decode("utf-8")
   1337         name = self.readline()[:-1].decode("utf-8")
   1338         klass = self.find_class(module, name)
   1339         self.append(klass)
   1340     dispatch[GLOBAL[0]] = load_global
   1341 
   1342     def load_stack_global(self):
   1343         name = self.stack.pop()
   1344         module = self.stack.pop()
   1345         if type(name) is not str or type(module) is not str:
   1346             raise UnpicklingError("STACK_GLOBAL requires str")
   1347         self.append(self.find_class(module, name))
   1348     dispatch[STACK_GLOBAL[0]] = load_stack_global
   1349 
   1350     def load_ext1(self):
   1351         code = self.read(1)[0]
   1352         self.get_extension(code)
   1353     dispatch[EXT1[0]] = load_ext1
   1354 
   1355     def load_ext2(self):
   1356         code, = unpack('<H', self.read(2))
   1357         self.get_extension(code)
   1358     dispatch[EXT2[0]] = load_ext2
   1359 
   1360     def load_ext4(self):
   1361         code, = unpack('<i', self.read(4))
   1362         self.get_extension(code)
   1363     dispatch[EXT4[0]] = load_ext4
   1364 
   1365     def get_extension(self, code):
   1366         nil = []
   1367         obj = _extension_cache.get(code, nil)
   1368         if obj is not nil:
   1369             self.append(obj)
   1370             return
   1371         key = _inverted_registry.get(code)
   1372         if not key:
   1373             if code <= 0: # note that 0 is forbidden
   1374                 # Corrupt or hostile pickle.
   1375                 raise UnpicklingError("EXT specifies code <= 0")
   1376             raise ValueError("unregistered extension code %d" % code)
   1377         obj = self.find_class(*key)
   1378         _extension_cache[code] = obj
   1379         self.append(obj)
   1380 
   1381     def find_class(self, module, name):
   1382         # Subclasses may override this.
   1383         if self.proto < 3 and self.fix_imports:
   1384             if (module, name) in _compat_pickle.NAME_MAPPING:
   1385                 module, name = _compat_pickle.NAME_MAPPING[(module, name)]
   1386             elif module in _compat_pickle.IMPORT_MAPPING:
   1387                 module = _compat_pickle.IMPORT_MAPPING[module]
   1388         __import__(module, level=0)
   1389         if self.proto >= 4:
   1390             return _getattribute(sys.modules[module], name)[0]
   1391         else:
   1392             return getattr(sys.modules[module], name)
   1393 
   1394     def load_reduce(self):
   1395         stack = self.stack
   1396         args = stack.pop()
   1397         func = stack[-1]
   1398         stack[-1] = func(*args)
   1399     dispatch[REDUCE[0]] = load_reduce
   1400 
   1401     def load_pop(self):
   1402         if self.stack:
   1403             del self.stack[-1]
   1404         else:
   1405             self.pop_mark()
   1406     dispatch[POP[0]] = load_pop
   1407 
   1408     def load_pop_mark(self):
   1409         self.pop_mark()
   1410     dispatch[POP_MARK[0]] = load_pop_mark
   1411 
   1412     def load_dup(self):
   1413         self.append(self.stack[-1])
   1414     dispatch[DUP[0]] = load_dup
   1415 
   1416     def load_get(self):
   1417         i = int(self.readline()[:-1])
   1418         self.append(self.memo[i])
   1419     dispatch[GET[0]] = load_get
   1420 
   1421     def load_binget(self):
   1422         i = self.read(1)[0]
   1423         self.append(self.memo[i])
   1424     dispatch[BINGET[0]] = load_binget
   1425 
   1426     def load_long_binget(self):
   1427         i, = unpack('<I', self.read(4))
   1428         self.append(self.memo[i])
   1429     dispatch[LONG_BINGET[0]] = load_long_binget
   1430 
   1431     def load_put(self):
   1432         i = int(self.readline()[:-1])
   1433         if i < 0:
   1434             raise ValueError("negative PUT argument")
   1435         self.memo[i] = self.stack[-1]
   1436     dispatch[PUT[0]] = load_put
   1437 
   1438     def load_binput(self):
   1439         i = self.read(1)[0]
   1440         if i < 0:
   1441             raise ValueError("negative BINPUT argument")
   1442         self.memo[i] = self.stack[-1]
   1443     dispatch[BINPUT[0]] = load_binput
   1444 
   1445     def load_long_binput(self):
   1446         i, = unpack('<I', self.read(4))
   1447         if i > maxsize:
   1448             raise ValueError("negative LONG_BINPUT argument")
   1449         self.memo[i] = self.stack[-1]
   1450     dispatch[LONG_BINPUT[0]] = load_long_binput
   1451 
   1452     def load_memoize(self):
   1453         memo = self.memo
   1454         memo[len(memo)] = self.stack[-1]
   1455     dispatch[MEMOIZE[0]] = load_memoize
   1456 
   1457     def load_append(self):
   1458         stack = self.stack
   1459         value = stack.pop()
   1460         list = stack[-1]
   1461         list.append(value)
   1462     dispatch[APPEND[0]] = load_append
   1463 
   1464     def load_appends(self):
   1465         items = self.pop_mark()
   1466         list_obj = self.stack[-1]
   1467         if isinstance(list_obj, list):
   1468             list_obj.extend(items)
   1469         else:
   1470             append = list_obj.append
   1471             for item in items:
   1472                 append(item)
   1473     dispatch[APPENDS[0]] = load_appends
   1474 
   1475     def load_setitem(self):
   1476         stack = self.stack
   1477         value = stack.pop()
   1478         key = stack.pop()
   1479         dict = stack[-1]
   1480         dict[key] = value
   1481     dispatch[SETITEM[0]] = load_setitem
   1482 
   1483     def load_setitems(self):
   1484         items = self.pop_mark()
   1485         dict = self.stack[-1]
   1486         for i in range(0, len(items), 2):
   1487             dict[items[i]] = items[i + 1]
   1488     dispatch[SETITEMS[0]] = load_setitems
   1489 
   1490     def load_additems(self):
   1491         items = self.pop_mark()
   1492         set_obj = self.stack[-1]
   1493         if isinstance(set_obj, set):
   1494             set_obj.update(items)
   1495         else:
   1496             add = set_obj.add
   1497             for item in items:
   1498                 add(item)
   1499     dispatch[ADDITEMS[0]] = load_additems
   1500 
   1501     def load_build(self):
   1502         stack = self.stack
   1503         state = stack.pop()
   1504         inst = stack[-1]
   1505         setstate = getattr(inst, "__setstate__", None)
   1506         if setstate is not None:
   1507             setstate(state)
   1508             return
   1509         slotstate = None
   1510         if isinstance(state, tuple) and len(state) == 2:
   1511             state, slotstate = state
   1512         if state:
   1513             inst_dict = inst.__dict__
   1514             intern = sys.intern
   1515             for k, v in state.items():
   1516                 if type(k) is str:
   1517                     inst_dict[intern(k)] = v
   1518                 else:
   1519                     inst_dict[k] = v
   1520         if slotstate:
   1521             for k, v in slotstate.items():
   1522                 setattr(inst, k, v)
   1523     dispatch[BUILD[0]] = load_build
   1524 
   1525     def load_mark(self):
   1526         self.metastack.append(self.stack)
   1527         self.stack = []
   1528         self.append = self.stack.append
   1529     dispatch[MARK[0]] = load_mark
   1530 
   1531     def load_stop(self):
   1532         value = self.stack.pop()
   1533         raise _Stop(value)
   1534     dispatch[STOP[0]] = load_stop
   1535 
   1536 
   1537 # Shorthands
   1538 
   1539 def _dump(obj, file, protocol=None, *, fix_imports=True):
   1540     _Pickler(file, protocol, fix_imports=fix_imports).dump(obj)
   1541 
   1542 def _dumps(obj, protocol=None, *, fix_imports=True):
   1543     f = io.BytesIO()
   1544     _Pickler(f, protocol, fix_imports=fix_imports).dump(obj)
   1545     res = f.getvalue()
   1546     assert isinstance(res, bytes_types)
   1547     return res
   1548 
   1549 def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"):
   1550     return _Unpickler(file, fix_imports=fix_imports,
   1551                      encoding=encoding, errors=errors).load()
   1552 
   1553 def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
   1554     if isinstance(s, str):
   1555         raise TypeError("Can't load pickle from unicode string")
   1556     file = io.BytesIO(s)
   1557     return _Unpickler(file, fix_imports=fix_imports,
   1558                       encoding=encoding, errors=errors).load()
   1559 
   1560 # Use the faster _pickle if possible
   1561 try:
   1562     from _pickle import (
   1563         PickleError,
   1564         PicklingError,
   1565         UnpicklingError,
   1566         Pickler,
   1567         Unpickler,
   1568         dump,
   1569         dumps,
   1570         load,
   1571         loads
   1572     )
   1573 except ImportError:
   1574     Pickler, Unpickler = _Pickler, _Unpickler
   1575     dump, dumps, load, loads = _dump, _dumps, _load, _loads
   1576 
   1577 # Doctest
   1578 def _test():
   1579     import doctest
   1580     return doctest.testmod()
   1581 
   1582 if __name__ == "__main__":
   1583     import argparse
   1584     parser = argparse.ArgumentParser(
   1585         description='display contents of the pickle files')
   1586     parser.add_argument(
   1587         'pickle_file', type=argparse.FileType('br'),
   1588         nargs='*', help='the pickle file')
   1589     parser.add_argument(
   1590         '-t', '--test', action='store_true',
   1591         help='run self-test suite')
   1592     parser.add_argument(
   1593         '-v', action='store_true',
   1594         help='run verbosely; only affects self-test run')
   1595     args = parser.parse_args()
   1596     if args.test:
   1597         _test()
   1598     else:
   1599         if not args.pickle_file:
   1600             parser.print_help()
   1601         else:
   1602             import pprint
   1603             for f in args.pickle_file:
   1604                 obj = load(f)
   1605                 pprint.pprint(obj)
   1606