Home | History | Annotate | Download | only in Lib
      1 """Create portable serialized representations of Python objects.
      2 
      3 See module copyreg for a mechanism for registering custom picklers.
      4 See module pickletools source for extensive comments.
      5 
      6 Classes:
      7 
      8     Pickler
      9     Unpickler
     10 
     11 Functions:
     12 
     13     dump(object, file)
     14     dumps(object) -> string
     15     load(file) -> object
     16     loads(string) -> object
     17 
     18 Misc variables:
     19 
     20     __version__
     21     format_version
     22     compatible_formats
     23 
     24 """
     25 
     26 from types import FunctionType
     27 from copyreg import dispatch_table
     28 from copyreg import _extension_registry, _inverted_registry, _extension_cache
     29 from itertools import islice
     30 from functools import partial
     31 import sys
     32 from sys import maxsize
     33 from struct import pack, unpack
     34 import re
     35 import io
     36 import codecs
     37 import _compat_pickle
     38 
     39 __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
     40            "Unpickler", "dump", "dumps", "load", "loads"]
     41 
     42 # Shortcut for use in isinstance testing
     43 bytes_types = (bytes, bytearray)
     44 
     45 # These are purely informational; no code uses these.
     46 format_version = "4.0"                  # File format version we write
     47 compatible_formats = ["1.0",            # Original protocol 0
     48                       "1.1",            # Protocol 0 with INST added
     49                       "1.2",            # Original protocol 1
     50                       "1.3",            # Protocol 1 with BINFLOAT added
     51                       "2.0",            # Protocol 2
     52                       "3.0",            # Protocol 3
     53                       "4.0",            # Protocol 4
     54                       ]                 # Old format versions we can read
     55 
     56 # This is the highest protocol number we know how to read.
     57 HIGHEST_PROTOCOL = 4
     58 
     59 # The protocol we write by default.  May be less than HIGHEST_PROTOCOL.
     60 # We intentionally write a protocol that Python 2.x cannot read;
     61 # there are too many issues with that.
     62 DEFAULT_PROTOCOL = 3
     63 
     64 class PickleError(Exception):
     65     """A common base class for the other pickling exceptions."""
     66     pass
     67 
     68 class PicklingError(PickleError):
     69     """This exception is raised when an unpicklable object is passed to the
     70     dump() method.
     71 
     72     """
     73     pass
     74 
     75 class UnpicklingError(PickleError):
     76     """This exception is raised when there is a problem unpickling an object,
     77     such as a security violation.
     78 
     79     Note that other exceptions may also be raised during unpickling, including
     80     (but not necessarily limited to) AttributeError, EOFError, ImportError,
     81     and IndexError.
     82 
     83     """
     84     pass
     85 
     86 # An instance of _Stop is raised by Unpickler.load_stop() in response to
     87 # the STOP opcode, passing the object that is the result of unpickling.
     88 class _Stop(Exception):
     89     def __init__(self, value):
     90         self.value = value
     91 
     92 # Jython has PyStringMap; it's a dict subclass with string keys
     93 try:
     94     from org.python.core import PyStringMap
     95 except ImportError:
     96     PyStringMap = None
     97 
     98 # Pickle opcodes.  See pickletools.py for extensive docs.  The listing
     99 # here is in kind-of alphabetical order of 1-character pickle code.
    100 # pickletools groups them by purpose.
    101 
    102 MARK           = b'('   # push special markobject on stack
    103 STOP           = b'.'   # every pickle ends with STOP
    104 POP            = b'0'   # discard topmost stack item
    105 POP_MARK       = b'1'   # discard stack top through topmost markobject
    106 DUP            = b'2'   # duplicate top stack item
    107 FLOAT          = b'F'   # push float object; decimal string argument
    108 INT            = b'I'   # push integer or bool; decimal string argument
    109 BININT         = b'J'   # push four-byte signed int
    110 BININT1        = b'K'   # push 1-byte unsigned int
    111 LONG           = b'L'   # push long; decimal string argument
    112 BININT2        = b'M'   # push 2-byte unsigned int
    113 NONE           = b'N'   # push None
    114 PERSID         = b'P'   # push persistent object; id is taken from string arg
    115 BINPERSID      = b'Q'   #  "       "         "  ;  "  "   "     "  stack
    116 REDUCE         = b'R'   # apply callable to argtuple, both on stack
    117 STRING         = b'S'   # push string; NL-terminated string argument
    118 BINSTRING      = b'T'   # push string; counted binary string argument
    119 SHORT_BINSTRING= b'U'   #  "     "   ;    "      "       "      " < 256 bytes
    120 UNICODE        = b'V'   # push Unicode string; raw-unicode-escaped'd argument
    121 BINUNICODE     = b'X'   #   "     "       "  ; counted UTF-8 string argument
    122 APPEND         = b'a'   # append stack top to list below it
    123 BUILD          = b'b'   # call __setstate__ or __dict__.update()
    124 GLOBAL         = b'c'   # push self.find_class(modname, name); 2 string args
    125 DICT           = b'd'   # build a dict from stack items
    126 EMPTY_DICT     = b'}'   # push empty dict
    127 APPENDS        = b'e'   # extend list on stack by topmost stack slice
    128 GET            = b'g'   # push item from memo on stack; index is string arg
    129 BINGET         = b'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
    130 INST           = b'i'   # build & push class instance
    131 LONG_BINGET    = b'j'   # push item from memo on stack; index is 4-byte arg
    132 LIST           = b'l'   # build list from topmost stack items
    133 EMPTY_LIST     = b']'   # push empty list
    134 OBJ            = b'o'   # build & push class instance
    135 PUT            = b'p'   # store stack top in memo; index is string arg
    136 BINPUT         = b'q'   #   "     "    "   "   " ;   "    " 1-byte arg
    137 LONG_BINPUT    = b'r'   #   "     "    "   "   " ;   "    " 4-byte arg
    138 SETITEM        = b's'   # add key+value pair to dict
    139 TUPLE          = b't'   # build tuple from topmost stack items
    140 EMPTY_TUPLE    = b')'   # push empty tuple
    141 SETITEMS       = b'u'   # modify dict by adding topmost key+value pairs
    142 BINFLOAT       = b'G'   # push float; arg is 8-byte float encoding
    143 
    144 TRUE           = b'I01\n'  # not an opcode; see INT docs in pickletools.py
    145 FALSE          = b'I00\n'  # not an opcode; see INT docs in pickletools.py
    146 
    147 # Protocol 2
    148 
    149 PROTO          = b'\x80'  # identify pickle protocol
    150 NEWOBJ         = b'\x81'  # build object by applying cls.__new__ to argtuple
    151 EXT1           = b'\x82'  # push object from extension registry; 1-byte index
    152 EXT2           = b'\x83'  # ditto, but 2-byte index
    153 EXT4           = b'\x84'  # ditto, but 4-byte index
    154 TUPLE1         = b'\x85'  # build 1-tuple from stack top
    155 TUPLE2         = b'\x86'  # build 2-tuple from two topmost stack items
    156 TUPLE3         = b'\x87'  # build 3-tuple from three topmost stack items
    157 NEWTRUE        = b'\x88'  # push True
    158 NEWFALSE       = b'\x89'  # push False
    159 LONG1          = b'\x8a'  # push long from < 256 bytes
    160 LONG4          = b'\x8b'  # push really big long
    161 
    162 _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
    163 
    164 # Protocol 3 (Python 3.x)
    165 
    166 BINBYTES       = b'B'   # push bytes; counted binary string argument
    167 SHORT_BINBYTES = b'C'   #  "     "   ;    "      "       "      " < 256 bytes
    168 
    169 # Protocol 4
    170 SHORT_BINUNICODE = b'\x8c'  # push short string; UTF-8 length < 256 bytes
    171 BINUNICODE8      = b'\x8d'  # push very long string
    172 BINBYTES8        = b'\x8e'  # push very long bytes string
    173 EMPTY_SET        = b'\x8f'  # push empty set on the stack
    174 ADDITEMS         = b'\x90'  # modify set by adding topmost stack items
    175 FROZENSET        = b'\x91'  # build frozenset from topmost stack items
    176 NEWOBJ_EX        = b'\x92'  # like NEWOBJ but work with keyword only arguments
    177 STACK_GLOBAL     = b'\x93'  # same as GLOBAL but using names on the stacks
    178 MEMOIZE          = b'\x94'  # store top of the stack in memo
    179 FRAME            = b'\x95'  # indicate the beginning of a new frame
    180 
    181 __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
    182 
    183 
    184 class _Framer:
    185 
    186     _FRAME_SIZE_MIN = 4
    187     _FRAME_SIZE_TARGET = 64 * 1024
    188 
    189     def __init__(self, file_write):
    190         self.file_write = file_write
    191         self.current_frame = None
    192 
    193     def start_framing(self):
    194         self.current_frame = io.BytesIO()
    195 
    196     def end_framing(self):
    197         if self.current_frame and self.current_frame.tell() > 0:
    198             self.commit_frame(force=True)
    199             self.current_frame = None
    200 
    201     def commit_frame(self, force=False):
    202         if self.current_frame:
    203             f = self.current_frame
    204             if f.tell() >= self._FRAME_SIZE_TARGET or force:
    205                 data = f.getbuffer()
    206                 write = self.file_write
    207                 if len(data) >= self._FRAME_SIZE_MIN:
    208                     # Issue a single call to the write method of the underlying
    209                     # file object for the frame opcode with the size of the
    210                     # frame. The concatenation is expected to be less expensive
    211                     # than issuing an additional call to write.
    212                     write(FRAME + pack("<Q", len(data)))
    213 
    214                 # Issue a separate call to write to append the frame
    215                 # contents without concatenation to the above to avoid a
    216                 # memory copy.
    217                 write(data)
    218 
    219                 # Start the new frame with a new io.BytesIO instance so that
    220                 # the file object can have delayed access to the previous frame
    221                 # contents via an unreleased memoryview of the previous
    222                 # io.BytesIO instance.
    223                 self.current_frame = io.BytesIO()
    224 
    225     def write(self, data):
    226         if self.current_frame:
    227             return self.current_frame.write(data)
    228         else:
    229             return self.file_write(data)
    230 
    231     def write_large_bytes(self, header, payload):
    232         write = self.file_write
    233         if self.current_frame:
    234             # Terminate the current frame and flush it to the file.
    235             self.commit_frame(force=True)
    236 
    237         # Perform direct write of the header and payload of the large binary
    238         # object. Be careful not to concatenate the header and the payload
    239         # prior to calling 'write' as we do not want to allocate a large
    240         # temporary bytes object.
    241         # We intentionally do not insert a protocol 4 frame opcode to make
    242         # it possible to optimize file.read calls in the loader.
    243         write(header)
    244         write(payload)
    245 
    246 
    247 class _Unframer:
    248 
    249     def __init__(self, file_read, file_readline, file_tell=None):
    250         self.file_read = file_read
    251         self.file_readline = file_readline
    252         self.current_frame = None
    253 
    254     def read(self, n):
    255         if self.current_frame:
    256             data = self.current_frame.read(n)
    257             if not data and n != 0:
    258                 self.current_frame = None
    259                 return self.file_read(n)
    260             if len(data) < n:
    261                 raise UnpicklingError(
    262                     "pickle exhausted before end of frame")
    263             return data
    264         else:
    265             return self.file_read(n)
    266 
    267     def readline(self):
    268         if self.current_frame:
    269             data = self.current_frame.readline()
    270             if not data:
    271                 self.current_frame = None
    272                 return self.file_readline()
    273             if data[-1] != b'\n'[0]:
    274                 raise UnpicklingError(
    275                     "pickle exhausted before end of frame")
    276             return data
    277         else:
    278             return self.file_readline()
    279 
    280     def load_frame(self, frame_size):
    281         if self.current_frame and self.current_frame.read() != b'':
    282             raise UnpicklingError(
    283                 "beginning of a new frame before end of current frame")
    284         self.current_frame = io.BytesIO(self.file_read(frame_size))
    285 
    286 
    287 # Tools used for pickling.
    288 
    289 def _getattribute(obj, name):
    290     for subpath in name.split('.'):
    291         if subpath == '<locals>':
    292             raise AttributeError("Can't get local attribute {!r} on {!r}"
    293                                  .format(name, obj))
    294         try:
    295             parent = obj
    296             obj = getattr(obj, subpath)
    297         except AttributeError:
    298             raise AttributeError("Can't get attribute {!r} on {!r}"
    299                                  .format(name, obj)) from None
    300     return obj, parent
    301 
    302 def whichmodule(obj, name):
    303     """Find the module an object belong to."""
    304     module_name = getattr(obj, '__module__', None)
    305     if module_name is not None:
    306         return module_name
    307     # Protect the iteration by using a list copy of sys.modules against dynamic
    308     # modules that trigger imports of other modules upon calls to getattr.
    309     for module_name, module in list(sys.modules.items()):
    310         if module_name == '__main__' or module is None:
    311             continue
    312         try:
    313             if _getattribute(module, name)[0] is obj:
    314                 return module_name
    315         except AttributeError:
    316             pass
    317     return '__main__'
    318 
    319 def encode_long(x):
    320     r"""Encode a long to a two's complement little-endian binary string.
    321     Note that 0 is a special case, returning an empty string, to save a
    322     byte in the LONG1 pickling context.
    323 
    324     >>> encode_long(0)
    325     b''
    326     >>> encode_long(255)
    327     b'\xff\x00'
    328     >>> encode_long(32767)
    329     b'\xff\x7f'
    330     >>> encode_long(-256)
    331     b'\x00\xff'
    332     >>> encode_long(-32768)
    333     b'\x00\x80'
    334     >>> encode_long(-128)
    335     b'\x80'
    336     >>> encode_long(127)
    337     b'\x7f'
    338     >>>
    339     """
    340     if x == 0:
    341         return b''
    342     nbytes = (x.bit_length() >> 3) + 1
    343     result = x.to_bytes(nbytes, byteorder='little', signed=True)
    344     if x < 0 and nbytes > 1:
    345         if result[-1] == 0xff and (result[-2] & 0x80) != 0:
    346             result = result[:-1]
    347     return result
    348 
    349 def decode_long(data):
    350     r"""Decode a long from a two's complement little-endian binary string.
    351 
    352     >>> decode_long(b'')
    353     0
    354     >>> decode_long(b"\xff\x00")
    355     255
    356     >>> decode_long(b"\xff\x7f")
    357     32767
    358     >>> decode_long(b"\x00\xff")
    359     -256
    360     >>> decode_long(b"\x00\x80")
    361     -32768
    362     >>> decode_long(b"\x80")
    363     -128
    364     >>> decode_long(b"\x7f")
    365     127
    366     """
    367     return int.from_bytes(data, byteorder='little', signed=True)
    368 
    369 
    370 # Pickling machinery
    371 
    372 class _Pickler:
    373 
    374     def __init__(self, file, protocol=None, *, fix_imports=True):
    375         """This takes a binary file for writing a pickle data stream.
    376 
    377         The optional *protocol* argument tells the pickler to use the
    378         given protocol; supported protocols are 0, 1, 2, 3 and 4.  The
    379         default protocol is 3; a backward-incompatible protocol designed
    380         for Python 3.
    381 
    382         Specifying a negative protocol version selects the highest
    383         protocol version supported.  The higher the protocol used, the
    384         more recent the version of Python needed to read the pickle
    385         produced.
    386 
    387         The *file* argument must have a write() method that accepts a
    388         single bytes argument. It can thus be a file object opened for
    389         binary writing, an io.BytesIO instance, or any other custom
    390         object that meets this interface.
    391 
    392         If *fix_imports* is True and *protocol* is less than 3, pickle
    393         will try to map the new Python 3 names to the old module names
    394         used in Python 2, so that the pickle data stream is readable
    395         with Python 2.
    396         """
    397         if protocol is None:
    398             protocol = DEFAULT_PROTOCOL
    399         if protocol < 0:
    400             protocol = HIGHEST_PROTOCOL
    401         elif not 0 <= protocol <= HIGHEST_PROTOCOL:
    402             raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
    403         try:
    404             self._file_write = file.write
    405         except AttributeError:
    406             raise TypeError("file must have a 'write' attribute")
    407         self.framer = _Framer(self._file_write)
    408         self.write = self.framer.write
    409         self._write_large_bytes = self.framer.write_large_bytes
    410         self.memo = {}
    411         self.proto = int(protocol)
    412         self.bin = protocol >= 1
    413         self.fast = 0
    414         self.fix_imports = fix_imports and protocol < 3
    415 
    416     def clear_memo(self):
    417         """Clears the pickler's "memo".
    418 
    419         The memo is the data structure that remembers which objects the
    420         pickler has already seen, so that shared or recursive objects
    421         are pickled by reference and not by value.  This method is
    422         useful when re-using picklers.
    423         """
    424         self.memo.clear()
    425 
    426     def dump(self, obj):
    427         """Write a pickled representation of obj to the open file."""
    428         # Check whether Pickler was initialized correctly. This is
    429         # only needed to mimic the behavior of _pickle.Pickler.dump().
    430         if not hasattr(self, "_file_write"):
    431             raise PicklingError("Pickler.__init__() was not called by "
    432                                 "%s.__init__()" % (self.__class__.__name__,))
    433         if self.proto >= 2:
    434             self.write(PROTO + pack("<B", self.proto))
    435         if self.proto >= 4:
    436             self.framer.start_framing()
    437         self.save(obj)
    438         self.write(STOP)
    439         self.framer.end_framing()
    440 
    441     def memoize(self, obj):
    442         """Store an object in the memo."""
    443 
    444         # The Pickler memo is a dictionary mapping object ids to 2-tuples
    445         # that contain the Unpickler memo key and the object being memoized.
    446         # The memo key is written to the pickle and will become
    447         # the key in the Unpickler's memo.  The object is stored in the
    448         # Pickler memo so that transient objects are kept alive during
    449         # pickling.
    450 
    451         # The use of the Unpickler memo length as the memo key is just a
    452         # convention.  The only requirement is that the memo values be unique.
    453         # But there appears no advantage to any other scheme, and this
    454         # scheme allows the Unpickler memo to be implemented as a plain (but
    455         # growable) array, indexed by memo key.
    456         if self.fast:
    457             return
    458         assert id(obj) not in self.memo
    459         idx = len(self.memo)
    460         self.write(self.put(idx))
    461         self.memo[id(obj)] = idx, obj
    462 
    463     # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
    464     def put(self, idx):
    465         if self.proto >= 4:
    466             return MEMOIZE
    467         elif self.bin:
    468             if idx < 256:
    469                 return BINPUT + pack("<B", idx)
    470             else:
    471                 return LONG_BINPUT + pack("<I", idx)
    472         else:
    473             return PUT + repr(idx).encode("ascii") + b'\n'
    474 
    475     # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
    476     def get(self, i):
    477         if self.bin:
    478             if i < 256:
    479                 return BINGET + pack("<B", i)
    480             else:
    481                 return LONG_BINGET + pack("<I", i)
    482 
    483         return GET + repr(i).encode("ascii") + b'\n'
    484 
    485     def save(self, obj, save_persistent_id=True):
    486         self.framer.commit_frame()
    487 
    488         # Check for persistent id (defined by a subclass)
    489         pid = self.persistent_id(obj)
    490         if pid is not None and save_persistent_id:
    491             self.save_pers(pid)
    492             return
    493 
    494         # Check the memo
    495         x = self.memo.get(id(obj))
    496         if x is not None:
    497             self.write(self.get(x[0]))
    498             return
    499 
    500         # Check the type dispatch table
    501         t = type(obj)
    502         f = self.dispatch.get(t)
    503         if f is not None:
    504             f(self, obj) # Call unbound method with explicit self
    505             return
    506 
    507         # Check private dispatch table if any, or else copyreg.dispatch_table
    508         reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
    509         if reduce is not None:
    510             rv = reduce(obj)
    511         else:
    512             # Check for a class with a custom metaclass; treat as regular class
    513             try:
    514                 issc = issubclass(t, type)
    515             except TypeError: # t is not a class (old Boost; see SF #502085)
    516                 issc = False
    517             if issc:
    518                 self.save_global(obj)
    519                 return
    520 
    521             # Check for a __reduce_ex__ method, fall back to __reduce__
    522             reduce = getattr(obj, "__reduce_ex__", None)
    523             if reduce is not None:
    524                 rv = reduce(self.proto)
    525             else:
    526                 reduce = getattr(obj, "__reduce__", None)
    527                 if reduce is not None:
    528                     rv = reduce()
    529                 else:
    530                     raise PicklingError("Can't pickle %r object: %r" %
    531                                         (t.__name__, obj))
    532 
    533         # Check for string returned by reduce(), meaning "save as global"
    534         if isinstance(rv, str):
    535             self.save_global(obj, rv)
    536             return
    537 
    538         # Assert that reduce() returned a tuple
    539         if not isinstance(rv, tuple):
    540             raise PicklingError("%s must return string or tuple" % reduce)
    541 
    542         # Assert that it returned an appropriately sized tuple
    543         l = len(rv)
    544         if not (2 <= l <= 5):
    545             raise PicklingError("Tuple returned by %s must have "
    546                                 "two to five elements" % reduce)
    547 
    548         # Save the reduce() output and finally memoize the object
    549         self.save_reduce(obj=obj, *rv)
    550 
    551     def persistent_id(self, obj):
    552         # This exists so a subclass can override it
    553         return None
    554 
    555     def save_pers(self, pid):
    556         # Save a persistent id reference
    557         if self.bin:
    558             self.save(pid, save_persistent_id=False)
    559             self.write(BINPERSID)
    560         else:
    561             try:
    562                 self.write(PERSID + str(pid).encode("ascii") + b'\n')
    563             except UnicodeEncodeError:
    564                 raise PicklingError(
    565                     "persistent IDs in protocol 0 must be ASCII strings")
    566 
    567     def save_reduce(self, func, args, state=None, listitems=None,
    568                     dictitems=None, obj=None):
    569         # This API is called by some subclasses
    570 
    571         if not isinstance(args, tuple):
    572             raise PicklingError("args from save_reduce() must be a tuple")
    573         if not callable(func):
    574             raise PicklingError("func from save_reduce() must be callable")
    575 
    576         save = self.save
    577         write = self.write
    578 
    579         func_name = getattr(func, "__name__", "")
    580         if self.proto >= 2 and func_name == "__newobj_ex__":
    581             cls, args, kwargs = args
    582             if not hasattr(cls, "__new__"):
    583                 raise PicklingError("args[0] from {} args has no __new__"
    584                                     .format(func_name))
    585             if obj is not None and cls is not obj.__class__:
    586                 raise PicklingError("args[0] from {} args has the wrong class"
    587                                     .format(func_name))
    588             if self.proto >= 4:
    589                 save(cls)
    590                 save(args)
    591                 save(kwargs)
    592                 write(NEWOBJ_EX)
    593             else:
    594                 func = partial(cls.__new__, cls, *args, **kwargs)
    595                 save(func)
    596                 save(())
    597                 write(REDUCE)
    598         elif self.proto >= 2 and func_name == "__newobj__":
    599             # A __reduce__ implementation can direct protocol 2 or newer to
    600             # use the more efficient NEWOBJ opcode, while still
    601             # allowing protocol 0 and 1 to work normally.  For this to
    602             # work, the function returned by __reduce__ should be
    603             # called __newobj__, and its first argument should be a
    604             # class.  The implementation for __newobj__
    605             # should be as follows, although pickle has no way to
    606             # verify this:
    607             #
    608             # def __newobj__(cls, *args):
    609             #     return cls.__new__(cls, *args)
    610             #
    611             # Protocols 0 and 1 will pickle a reference to __newobj__,
    612             # while protocol 2 (and above) will pickle a reference to
    613             # cls, the remaining args tuple, and the NEWOBJ code,
    614             # which calls cls.__new__(cls, *args) at unpickling time
    615             # (see load_newobj below).  If __reduce__ returns a
    616             # three-tuple, the state from the third tuple item will be
    617             # pickled regardless of the protocol, calling __setstate__
    618             # at unpickling time (see load_build below).
    619             #
    620             # Note that no standard __newobj__ implementation exists;
    621             # you have to provide your own.  This is to enforce
    622             # compatibility with Python 2.2 (pickles written using
    623             # protocol 0 or 1 in Python 2.3 should be unpicklable by
    624             # Python 2.2).
    625             cls = args[0]
    626             if not hasattr(cls, "__new__"):
    627                 raise PicklingError(
    628                     "args[0] from __newobj__ args has no __new__")
    629             if obj is not None and cls is not obj.__class__:
    630                 raise PicklingError(
    631                     "args[0] from __newobj__ args has the wrong class")
    632             args = args[1:]
    633             save(cls)
    634             save(args)
    635             write(NEWOBJ)
    636         else:
    637             save(func)
    638             save(args)
    639             write(REDUCE)
    640 
    641         if obj is not None:
    642             # If the object is already in the memo, this means it is
    643             # recursive. In this case, throw away everything we put on the
    644             # stack, and fetch the object back from the memo.
    645             if id(obj) in self.memo:
    646                 write(POP + self.get(self.memo[id(obj)][0]))
    647             else:
    648                 self.memoize(obj)
    649 
    650         # More new special cases (that work with older protocols as
    651         # well): when __reduce__ returns a tuple with 4 or 5 items,
    652         # the 4th and 5th item should be iterators that provide list
    653         # items and dict items (as (key, value) tuples), or None.
    654 
    655         if listitems is not None:
    656             self._batch_appends(listitems)
    657 
    658         if dictitems is not None:
    659             self._batch_setitems(dictitems)
    660 
    661         if state is not None:
    662             save(state)
    663             write(BUILD)
    664 
    665     # Methods below this point are dispatched through the dispatch table
    666 
    667     dispatch = {}
    668 
    669     def save_none(self, obj):
    670         self.write(NONE)
    671     dispatch[type(None)] = save_none
    672 
    673     def save_bool(self, obj):
    674         if self.proto >= 2:
    675             self.write(NEWTRUE if obj else NEWFALSE)
    676         else:
    677             self.write(TRUE if obj else FALSE)
    678     dispatch[bool] = save_bool
    679 
    680     def save_long(self, obj):
    681         if self.bin:
    682             # If the int is small enough to fit in a signed 4-byte 2's-comp
    683             # format, we can store it more efficiently than the general
    684             # case.
    685             # First one- and two-byte unsigned ints:
    686             if obj >= 0:
    687                 if obj <= 0xff:
    688                     self.write(BININT1 + pack("<B", obj))
    689                     return
    690                 if obj <= 0xffff:
    691                     self.write(BININT2 + pack("<H", obj))
    692                     return
    693             # Next check for 4-byte signed ints:
    694             if -0x80000000 <= obj <= 0x7fffffff:
    695                 self.write(BININT + pack("<i", obj))
    696                 return
    697         if self.proto >= 2:
    698             encoded = encode_long(obj)
    699             n = len(encoded)
    700             if n < 256:
    701                 self.write(LONG1 + pack("<B", n) + encoded)
    702             else:
    703                 self.write(LONG4 + pack("<i", n) + encoded)
    704             return
    705         if -0x80000000 <= obj <= 0x7fffffff:
    706             self.write(INT + repr(obj).encode("ascii") + b'\n')
    707         else:
    708             self.write(LONG + repr(obj).encode("ascii") + b'L\n')
    709     dispatch[int] = save_long
    710 
    711     def save_float(self, obj):
    712         if self.bin:
    713             self.write(BINFLOAT + pack('>d', obj))
    714         else:
    715             self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
    716     dispatch[float] = save_float
    717 
    718     def save_bytes(self, obj):
    719         if self.proto < 3:
    720             if not obj: # bytes object is empty
    721                 self.save_reduce(bytes, (), obj=obj)
    722             else:
    723                 self.save_reduce(codecs.encode,
    724                                  (str(obj, 'latin1'), 'latin1'), obj=obj)
    725             return
    726         n = len(obj)
    727         if n <= 0xff:
    728             self.write(SHORT_BINBYTES + pack("<B", n) + obj)
    729         elif n > 0xffffffff and self.proto >= 4:
    730             self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
    731         elif n >= self.framer._FRAME_SIZE_TARGET:
    732             self._write_large_bytes(BINBYTES + pack("<I", n), obj)
    733         else:
    734             self.write(BINBYTES + pack("<I", n) + obj)
    735         self.memoize(obj)
    736     dispatch[bytes] = save_bytes
    737 
    738     def save_str(self, obj):
    739         if self.bin:
    740             encoded = obj.encode('utf-8', 'surrogatepass')
    741             n = len(encoded)
    742             if n <= 0xff and self.proto >= 4:
    743                 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
    744             elif n > 0xffffffff and self.proto >= 4:
    745                 self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
    746             elif n >= self.framer._FRAME_SIZE_TARGET:
    747                 self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
    748             else:
    749                 self.write(BINUNICODE + pack("<I", n) + encoded)
    750         else:
    751             obj = obj.replace("\\", "\\u005c")
    752             obj = obj.replace("\n", "\\u000a")
    753             self.write(UNICODE + obj.encode('raw-unicode-escape') +
    754                        b'\n')
    755         self.memoize(obj)
    756     dispatch[str] = save_str
    757 
    758     def save_tuple(self, obj):
    759         if not obj: # tuple is empty
    760             if self.bin:
    761                 self.write(EMPTY_TUPLE)
    762             else:
    763                 self.write(MARK + TUPLE)
    764             return
    765 
    766         n = len(obj)
    767         save = self.save
    768         memo = self.memo
    769         if n <= 3 and self.proto >= 2:
    770             for element in obj:
    771                 save(element)
    772             # Subtle.  Same as in the big comment below.
    773             if id(obj) in memo:
    774                 get = self.get(memo[id(obj)][0])
    775                 self.write(POP * n + get)
    776             else:
    777                 self.write(_tuplesize2code[n])
    778                 self.memoize(obj)
    779             return
    780 
    781         # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
    782         # has more than 3 elements.
    783         write = self.write
    784         write(MARK)
    785         for element in obj:
    786             save(element)
    787 
    788         if id(obj) in memo:
    789             # Subtle.  d was not in memo when we entered save_tuple(), so
    790             # the process of saving the tuple's elements must have saved
    791             # the tuple itself:  the tuple is recursive.  The proper action
    792             # now is to throw away everything we put on the stack, and
    793             # simply GET the tuple (it's already constructed).  This check
    794             # could have been done in the "for element" loop instead, but
    795             # recursive tuples are a rare thing.
    796             get = self.get(memo[id(obj)][0])
    797             if self.bin:
    798                 write(POP_MARK + get)
    799             else:   # proto 0 -- POP_MARK not available
    800                 write(POP * (n+1) + get)
    801             return
    802 
    803         # No recursion.
    804         write(TUPLE)
    805         self.memoize(obj)
    806 
    807     dispatch[tuple] = save_tuple
    808 
    809     def save_list(self, obj):
    810         if self.bin:
    811             self.write(EMPTY_LIST)
    812         else:   # proto 0 -- can't use EMPTY_LIST
    813             self.write(MARK + LIST)
    814 
    815         self.memoize(obj)
    816         self._batch_appends(obj)
    817 
    818     dispatch[list] = save_list
    819 
    820     _BATCHSIZE = 1000
    821 
    822     def _batch_appends(self, items):
    823         # Helper to batch up APPENDS sequences
    824         save = self.save
    825         write = self.write
    826 
    827         if not self.bin:
    828             for x in items:
    829                 save(x)
    830                 write(APPEND)
    831             return
    832 
    833         it = iter(items)
    834         while True:
    835             tmp = list(islice(it, self._BATCHSIZE))
    836             n = len(tmp)
    837             if n > 1:
    838                 write(MARK)
    839                 for x in tmp:
    840                     save(x)
    841                 write(APPENDS)
    842             elif n:
    843                 save(tmp[0])
    844                 write(APPEND)
    845             # else tmp is empty, and we're done
    846             if n < self._BATCHSIZE:
    847                 return
    848 
    849     def save_dict(self, obj):
    850         if self.bin:
    851             self.write(EMPTY_DICT)
    852         else:   # proto 0 -- can't use EMPTY_DICT
    853             self.write(MARK + DICT)
    854 
    855         self.memoize(obj)
    856         self._batch_setitems(obj.items())
    857 
    858     dispatch[dict] = save_dict
    859     if PyStringMap is not None:
    860         dispatch[PyStringMap] = save_dict
    861 
    862     def _batch_setitems(self, items):
    863         # Helper to batch up SETITEMS sequences; proto >= 1 only
    864         save = self.save
    865         write = self.write
    866 
    867         if not self.bin:
    868             for k, v in items:
    869                 save(k)
    870                 save(v)
    871                 write(SETITEM)
    872             return
    873 
    874         it = iter(items)
    875         while True:
    876             tmp = list(islice(it, self._BATCHSIZE))
    877             n = len(tmp)
    878             if n > 1:
    879                 write(MARK)
    880                 for k, v in tmp:
    881                     save(k)
    882                     save(v)
    883                 write(SETITEMS)
    884             elif n:
    885                 k, v = tmp[0]
    886                 save(k)
    887                 save(v)
    888                 write(SETITEM)
    889             # else tmp is empty, and we're done
    890             if n < self._BATCHSIZE:
    891                 return
    892 
    893     def save_set(self, obj):
    894         save = self.save
    895         write = self.write
    896 
    897         if self.proto < 4:
    898             self.save_reduce(set, (list(obj),), obj=obj)
    899             return
    900 
    901         write(EMPTY_SET)
    902         self.memoize(obj)
    903 
    904         it = iter(obj)
    905         while True:
    906             batch = list(islice(it, self._BATCHSIZE))
    907             n = len(batch)
    908             if n > 0:
    909                 write(MARK)
    910                 for item in batch:
    911                     save(item)
    912                 write(ADDITEMS)
    913             if n < self._BATCHSIZE:
    914                 return
    915     dispatch[set] = save_set
    916 
    917     def save_frozenset(self, obj):
    918         save = self.save
    919         write = self.write
    920 
    921         if self.proto < 4:
    922             self.save_reduce(frozenset, (list(obj),), obj=obj)
    923             return
    924 
    925         write(MARK)
    926         for item in obj:
    927             save(item)
    928 
    929         if id(obj) in self.memo:
    930             # If the object is already in the memo, this means it is
    931             # recursive. In this case, throw away everything we put on the
    932             # stack, and fetch the object back from the memo.
    933             write(POP_MARK + self.get(self.memo[id(obj)][0]))
    934             return
    935 
    936         write(FROZENSET)
    937         self.memoize(obj)
    938     dispatch[frozenset] = save_frozenset
    939 
    940     def save_global(self, obj, name=None):
    941         write = self.write
    942         memo = self.memo
    943 
    944         if name is None:
    945             name = getattr(obj, '__qualname__', None)
    946         if name is None:
    947             name = obj.__name__
    948 
    949         module_name = whichmodule(obj, name)
    950         try:
    951             __import__(module_name, level=0)
    952             module = sys.modules[module_name]
    953             obj2, parent = _getattribute(module, name)
    954         except (ImportError, KeyError, AttributeError):
    955             raise PicklingError(
    956                 "Can't pickle %r: it's not found as %s.%s" %
    957                 (obj, module_name, name)) from None
    958         else:
    959             if obj2 is not obj:
    960                 raise PicklingError(
    961                     "Can't pickle %r: it's not the same object as %s.%s" %
    962                     (obj, module_name, name))
    963 
    964         if self.proto >= 2:
    965             code = _extension_registry.get((module_name, name))
    966             if code:
    967                 assert code > 0
    968                 if code <= 0xff:
    969                     write(EXT1 + pack("<B", code))
    970                 elif code <= 0xffff:
    971                     write(EXT2 + pack("<H", code))
    972                 else:
    973                     write(EXT4 + pack("<i", code))
    974                 return
    975         lastname = name.rpartition('.')[2]
    976         if parent is module:
    977             name = lastname
    978         # Non-ASCII identifiers are supported only with protocols >= 3.
    979         if self.proto >= 4:
    980             self.save(module_name)
    981             self.save(name)
    982             write(STACK_GLOBAL)
    983         elif parent is not module:
    984             self.save_reduce(getattr, (parent, lastname))
    985         elif self.proto >= 3:
    986             write(GLOBAL + bytes(module_name, "utf-8") + b'\n' +
    987                   bytes(name, "utf-8") + b'\n')
    988         else:
    989             if self.fix_imports:
    990                 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING
    991                 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING
    992                 if (module_name, name) in r_name_mapping:
    993                     module_name, name = r_name_mapping[(module_name, name)]
    994                 elif module_name in r_import_mapping:
    995                     module_name = r_import_mapping[module_name]
    996             try:
    997                 write(GLOBAL + bytes(module_name, "ascii") + b'\n' +
    998                       bytes(name, "ascii") + b'\n')
    999             except UnicodeEncodeError:
   1000                 raise PicklingError(
   1001                     "can't pickle global identifier '%s.%s' using "
   1002                     "pickle protocol %i" % (module, name, self.proto)) from None
   1003 
   1004         self.memoize(obj)
   1005 
   1006     def save_type(self, obj):
   1007         if obj is type(None):
   1008             return self.save_reduce(type, (None,), obj=obj)
   1009         elif obj is type(NotImplemented):
   1010             return self.save_reduce(type, (NotImplemented,), obj=obj)
   1011         elif obj is type(...):
   1012             return self.save_reduce(type, (...,), obj=obj)
   1013         return self.save_global(obj)
   1014 
   1015     dispatch[FunctionType] = save_global
   1016     dispatch[type] = save_type
   1017 
   1018 
   1019 # Unpickling machinery
   1020 
   1021 class _Unpickler:
   1022 
   1023     def __init__(self, file, *, fix_imports=True,
   1024                  encoding="ASCII", errors="strict"):
   1025         """This takes a binary file for reading a pickle data stream.
   1026 
   1027         The protocol version of the pickle is detected automatically, so
   1028         no proto argument is needed.
   1029 
   1030         The argument *file* must have two methods, a read() method that
   1031         takes an integer argument, and a readline() method that requires
   1032         no arguments.  Both methods should return bytes.  Thus *file*
   1033         can be a binary file object opened for reading, an io.BytesIO
   1034         object, or any other custom object that meets this interface.
   1035 
   1036         The file-like object must have two methods, a read() method
   1037         that takes an integer argument, and a readline() method that
   1038         requires no arguments.  Both methods should return bytes.
   1039         Thus file-like object can be a binary file object opened for
   1040         reading, a BytesIO object, or any other custom object that
   1041         meets this interface.
   1042 
   1043         Optional keyword arguments are *fix_imports*, *encoding* and
   1044         *errors*, which are used to control compatibility support for
   1045         pickle stream generated by Python 2.  If *fix_imports* is True,
   1046         pickle will try to map the old Python 2 names to the new names
   1047         used in Python 3.  The *encoding* and *errors* tell pickle how
   1048         to decode 8-bit string instances pickled by Python 2; these
   1049         default to 'ASCII' and 'strict', respectively. *encoding* can be
   1050         'bytes' to read theses 8-bit string instances as bytes objects.
   1051         """
   1052         self._file_readline = file.readline
   1053         self._file_read = file.read
   1054         self.memo = {}
   1055         self.encoding = encoding
   1056         self.errors = errors
   1057         self.proto = 0
   1058         self.fix_imports = fix_imports
   1059 
   1060     def load(self):
   1061         """Read a pickled object representation from the open file.
   1062 
   1063         Return the reconstituted object hierarchy specified in the file.
   1064         """
   1065         # Check whether Unpickler was initialized correctly. This is
   1066         # only needed to mimic the behavior of _pickle.Unpickler.dump().
   1067         if not hasattr(self, "_file_read"):
   1068             raise UnpicklingError("Unpickler.__init__() was not called by "
   1069                                   "%s.__init__()" % (self.__class__.__name__,))
   1070         self._unframer = _Unframer(self._file_read, self._file_readline)
   1071         self.read = self._unframer.read
   1072         self.readline = self._unframer.readline
   1073         self.metastack = []
   1074         self.stack = []
   1075         self.append = self.stack.append
   1076         self.proto = 0
   1077         read = self.read
   1078         dispatch = self.dispatch
   1079         try:
   1080             while True:
   1081                 key = read(1)
   1082                 if not key:
   1083                     raise EOFError
   1084                 assert isinstance(key, bytes_types)
   1085                 dispatch[key[0]](self)
   1086         except _Stop as stopinst:
   1087             return stopinst.value
   1088 
   1089     # Return a list of items pushed in the stack after last MARK instruction.
   1090     def pop_mark(self):
   1091         items = self.stack
   1092         self.stack = self.metastack.pop()
   1093         self.append = self.stack.append
   1094         return items
   1095 
   1096     def persistent_load(self, pid):
   1097         raise UnpicklingError("unsupported persistent id encountered")
   1098 
   1099     dispatch = {}
   1100 
   1101     def load_proto(self):
   1102         proto = self.read(1)[0]
   1103         if not 0 <= proto <= HIGHEST_PROTOCOL:
   1104             raise ValueError("unsupported pickle protocol: %d" % proto)
   1105         self.proto = proto
   1106     dispatch[PROTO[0]] = load_proto
   1107 
   1108     def load_frame(self):
   1109         frame_size, = unpack('<Q', self.read(8))
   1110         if frame_size > sys.maxsize:
   1111             raise ValueError("frame size > sys.maxsize: %d" % frame_size)
   1112         self._unframer.load_frame(frame_size)
   1113     dispatch[FRAME[0]] = load_frame
   1114 
   1115     def load_persid(self):
   1116         try:
   1117             pid = self.readline()[:-1].decode("ascii")
   1118         except UnicodeDecodeError:
   1119             raise UnpicklingError(
   1120                 "persistent IDs in protocol 0 must be ASCII strings")
   1121         self.append(self.persistent_load(pid))
   1122     dispatch[PERSID[0]] = load_persid
   1123 
   1124     def load_binpersid(self):
   1125         pid = self.stack.pop()
   1126         self.append(self.persistent_load(pid))
   1127     dispatch[BINPERSID[0]] = load_binpersid
   1128 
   1129     def load_none(self):
   1130         self.append(None)
   1131     dispatch[NONE[0]] = load_none
   1132 
   1133     def load_false(self):
   1134         self.append(False)
   1135     dispatch[NEWFALSE[0]] = load_false
   1136 
   1137     def load_true(self):
   1138         self.append(True)
   1139     dispatch[NEWTRUE[0]] = load_true
   1140 
   1141     def load_int(self):
   1142         data = self.readline()
   1143         if data == FALSE[1:]:
   1144             val = False
   1145         elif data == TRUE[1:]:
   1146             val = True
   1147         else:
   1148             val = int(data, 0)
   1149         self.append(val)
   1150     dispatch[INT[0]] = load_int
   1151 
   1152     def load_binint(self):
   1153         self.append(unpack('<i', self.read(4))[0])
   1154     dispatch[BININT[0]] = load_binint
   1155 
   1156     def load_binint1(self):
   1157         self.append(self.read(1)[0])
   1158     dispatch[BININT1[0]] = load_binint1
   1159 
   1160     def load_binint2(self):
   1161         self.append(unpack('<H', self.read(2))[0])
   1162     dispatch[BININT2[0]] = load_binint2
   1163 
   1164     def load_long(self):
   1165         val = self.readline()[:-1]
   1166         if val and val[-1] == b'L'[0]:
   1167             val = val[:-1]
   1168         self.append(int(val, 0))
   1169     dispatch[LONG[0]] = load_long
   1170 
   1171     def load_long1(self):
   1172         n = self.read(1)[0]
   1173         data = self.read(n)
   1174         self.append(decode_long(data))
   1175     dispatch[LONG1[0]] = load_long1
   1176 
   1177     def load_long4(self):
   1178         n, = unpack('<i', self.read(4))
   1179         if n < 0:
   1180             # Corrupt or hostile pickle -- we never write one like this
   1181             raise UnpicklingError("LONG pickle has negative byte count")
   1182         data = self.read(n)
   1183         self.append(decode_long(data))
   1184     dispatch[LONG4[0]] = load_long4
   1185 
   1186     def load_float(self):
   1187         self.append(float(self.readline()[:-1]))
   1188     dispatch[FLOAT[0]] = load_float
   1189 
   1190     def load_binfloat(self):
   1191         self.append(unpack('>d', self.read(8))[0])
   1192     dispatch[BINFLOAT[0]] = load_binfloat
   1193 
   1194     def _decode_string(self, value):
   1195         # Used to allow strings from Python 2 to be decoded either as
   1196         # bytes or Unicode strings.  This should be used only with the
   1197         # STRING, BINSTRING and SHORT_BINSTRING opcodes.
   1198         if self.encoding == "bytes":
   1199             return value
   1200         else:
   1201             return value.decode(self.encoding, self.errors)
   1202 
   1203     def load_string(self):
   1204         data = self.readline()[:-1]
   1205         # Strip outermost quotes
   1206         if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
   1207             data = data[1:-1]
   1208         else:
   1209             raise UnpicklingError("the STRING opcode argument must be quoted")
   1210         self.append(self._decode_string(codecs.escape_decode(data)[0]))
   1211     dispatch[STRING[0]] = load_string
   1212 
   1213     def load_binstring(self):
   1214         # Deprecated BINSTRING uses signed 32-bit length
   1215         len, = unpack('<i', self.read(4))
   1216         if len < 0:
   1217             raise UnpicklingError("BINSTRING pickle has negative byte count")
   1218         data = self.read(len)
   1219         self.append(self._decode_string(data))
   1220     dispatch[BINSTRING[0]] = load_binstring
   1221 
   1222     def load_binbytes(self):
   1223         len, = unpack('<I', self.read(4))
   1224         if len > maxsize:
   1225             raise UnpicklingError("BINBYTES exceeds system's maximum size "
   1226                                   "of %d bytes" % maxsize)
   1227         self.append(self.read(len))
   1228     dispatch[BINBYTES[0]] = load_binbytes
   1229 
   1230     def load_unicode(self):
   1231         self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
   1232     dispatch[UNICODE[0]] = load_unicode
   1233 
   1234     def load_binunicode(self):
   1235         len, = unpack('<I', self.read(4))
   1236         if len > maxsize:
   1237             raise UnpicklingError("BINUNICODE exceeds system's maximum size "
   1238                                   "of %d bytes" % maxsize)
   1239         self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
   1240     dispatch[BINUNICODE[0]] = load_binunicode
   1241 
   1242     def load_binunicode8(self):
   1243         len, = unpack('<Q', self.read(8))
   1244         if len > maxsize:
   1245             raise UnpicklingError("BINUNICODE8 exceeds system's maximum size "
   1246                                   "of %d bytes" % maxsize)
   1247         self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
   1248     dispatch[BINUNICODE8[0]] = load_binunicode8
   1249 
   1250     def load_binbytes8(self):
   1251         len, = unpack('<Q', self.read(8))
   1252         if len > maxsize:
   1253             raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
   1254                                   "of %d bytes" % maxsize)
   1255         self.append(self.read(len))
   1256     dispatch[BINBYTES8[0]] = load_binbytes8
   1257 
   1258     def load_short_binstring(self):
   1259         len = self.read(1)[0]
   1260         data = self.read(len)
   1261         self.append(self._decode_string(data))
   1262     dispatch[SHORT_BINSTRING[0]] = load_short_binstring
   1263 
   1264     def load_short_binbytes(self):
   1265         len = self.read(1)[0]
   1266         self.append(self.read(len))
   1267     dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
   1268 
   1269     def load_short_binunicode(self):
   1270         len = self.read(1)[0]
   1271         self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
   1272     dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
   1273 
   1274     def load_tuple(self):
   1275         items = self.pop_mark()
   1276         self.append(tuple(items))
   1277     dispatch[TUPLE[0]] = load_tuple
   1278 
   1279     def load_empty_tuple(self):
   1280         self.append(())
   1281     dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
   1282 
   1283     def load_tuple1(self):
   1284         self.stack[-1] = (self.stack[-1],)
   1285     dispatch[TUPLE1[0]] = load_tuple1
   1286 
   1287     def load_tuple2(self):
   1288         self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
   1289     dispatch[TUPLE2[0]] = load_tuple2
   1290 
   1291     def load_tuple3(self):
   1292         self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
   1293     dispatch[TUPLE3[0]] = load_tuple3
   1294 
   1295     def load_empty_list(self):
   1296         self.append([])
   1297     dispatch[EMPTY_LIST[0]] = load_empty_list
   1298 
   1299     def load_empty_dictionary(self):
   1300         self.append({})
   1301     dispatch[EMPTY_DICT[0]] = load_empty_dictionary
   1302 
   1303     def load_empty_set(self):
   1304         self.append(set())
   1305     dispatch[EMPTY_SET[0]] = load_empty_set
   1306 
   1307     def load_frozenset(self):
   1308         items = self.pop_mark()
   1309         self.append(frozenset(items))
   1310     dispatch[FROZENSET[0]] = load_frozenset
   1311 
   1312     def load_list(self):
   1313         items = self.pop_mark()
   1314         self.append(items)
   1315     dispatch[LIST[0]] = load_list
   1316 
   1317     def load_dict(self):
   1318         items = self.pop_mark()
   1319         d = {items[i]: items[i+1]
   1320              for i in range(0, len(items), 2)}
   1321         self.append(d)
   1322     dispatch[DICT[0]] = load_dict
   1323 
   1324     # INST and OBJ differ only in how they get a class object.  It's not
   1325     # only sensible to do the rest in a common routine, the two routines
   1326     # previously diverged and grew different bugs.
   1327     # klass is the class to instantiate, and k points to the topmost mark
   1328     # object, following which are the arguments for klass.__init__.
   1329     def _instantiate(self, klass, args):
   1330         if (args or not isinstance(klass, type) or
   1331             hasattr(klass, "__getinitargs__")):
   1332             try:
   1333                 value = klass(*args)
   1334             except TypeError as err:
   1335                 raise TypeError("in constructor for %s: %s" %
   1336                                 (klass.__name__, str(err)), sys.exc_info()[2])
   1337         else:
   1338             value = klass.__new__(klass)
   1339         self.append(value)
   1340 
   1341     def load_inst(self):
   1342         module = self.readline()[:-1].decode("ascii")
   1343         name = self.readline()[:-1].decode("ascii")
   1344         klass = self.find_class(module, name)
   1345         self._instantiate(klass, self.pop_mark())
   1346     dispatch[INST[0]] = load_inst
   1347 
   1348     def load_obj(self):
   1349         # Stack is ... markobject classobject arg1 arg2 ...
   1350         args = self.pop_mark()
   1351         cls = args.pop(0)
   1352         self._instantiate(cls, args)
   1353     dispatch[OBJ[0]] = load_obj
   1354 
   1355     def load_newobj(self):
   1356         args = self.stack.pop()
   1357         cls = self.stack.pop()
   1358         obj = cls.__new__(cls, *args)
   1359         self.append(obj)
   1360     dispatch[NEWOBJ[0]] = load_newobj
   1361 
   1362     def load_newobj_ex(self):
   1363         kwargs = self.stack.pop()
   1364         args = self.stack.pop()
   1365         cls = self.stack.pop()
   1366         obj = cls.__new__(cls, *args, **kwargs)
   1367         self.append(obj)
   1368     dispatch[NEWOBJ_EX[0]] = load_newobj_ex
   1369 
   1370     def load_global(self):
   1371         module = self.readline()[:-1].decode("utf-8")
   1372         name = self.readline()[:-1].decode("utf-8")
   1373         klass = self.find_class(module, name)
   1374         self.append(klass)
   1375     dispatch[GLOBAL[0]] = load_global
   1376 
   1377     def load_stack_global(self):
   1378         name = self.stack.pop()
   1379         module = self.stack.pop()
   1380         if type(name) is not str or type(module) is not str:
   1381             raise UnpicklingError("STACK_GLOBAL requires str")
   1382         self.append(self.find_class(module, name))
   1383     dispatch[STACK_GLOBAL[0]] = load_stack_global
   1384 
   1385     def load_ext1(self):
   1386         code = self.read(1)[0]
   1387         self.get_extension(code)
   1388     dispatch[EXT1[0]] = load_ext1
   1389 
   1390     def load_ext2(self):
   1391         code, = unpack('<H', self.read(2))
   1392         self.get_extension(code)
   1393     dispatch[EXT2[0]] = load_ext2
   1394 
   1395     def load_ext4(self):
   1396         code, = unpack('<i', self.read(4))
   1397         self.get_extension(code)
   1398     dispatch[EXT4[0]] = load_ext4
   1399 
   1400     def get_extension(self, code):
   1401         nil = []
   1402         obj = _extension_cache.get(code, nil)
   1403         if obj is not nil:
   1404             self.append(obj)
   1405             return
   1406         key = _inverted_registry.get(code)
   1407         if not key:
   1408             if code <= 0: # note that 0 is forbidden
   1409                 # Corrupt or hostile pickle.
   1410                 raise UnpicklingError("EXT specifies code <= 0")
   1411             raise ValueError("unregistered extension code %d" % code)
   1412         obj = self.find_class(*key)
   1413         _extension_cache[code] = obj
   1414         self.append(obj)
   1415 
   1416     def find_class(self, module, name):
   1417         # Subclasses may override this.
   1418         if self.proto < 3 and self.fix_imports:
   1419             if (module, name) in _compat_pickle.NAME_MAPPING:
   1420                 module, name = _compat_pickle.NAME_MAPPING[(module, name)]
   1421             elif module in _compat_pickle.IMPORT_MAPPING:
   1422                 module = _compat_pickle.IMPORT_MAPPING[module]
   1423         __import__(module, level=0)
   1424         if self.proto >= 4:
   1425             return _getattribute(sys.modules[module], name)[0]
   1426         else:
   1427             return getattr(sys.modules[module], name)
   1428 
   1429     def load_reduce(self):
   1430         stack = self.stack
   1431         args = stack.pop()
   1432         func = stack[-1]
   1433         stack[-1] = func(*args)
   1434     dispatch[REDUCE[0]] = load_reduce
   1435 
   1436     def load_pop(self):
   1437         if self.stack:
   1438             del self.stack[-1]
   1439         else:
   1440             self.pop_mark()
   1441     dispatch[POP[0]] = load_pop
   1442 
   1443     def load_pop_mark(self):
   1444         self.pop_mark()
   1445     dispatch[POP_MARK[0]] = load_pop_mark
   1446 
   1447     def load_dup(self):
   1448         self.append(self.stack[-1])
   1449     dispatch[DUP[0]] = load_dup
   1450 
   1451     def load_get(self):
   1452         i = int(self.readline()[:-1])
   1453         self.append(self.memo[i])
   1454     dispatch[GET[0]] = load_get
   1455 
   1456     def load_binget(self):
   1457         i = self.read(1)[0]
   1458         self.append(self.memo[i])
   1459     dispatch[BINGET[0]] = load_binget
   1460 
   1461     def load_long_binget(self):
   1462         i, = unpack('<I', self.read(4))
   1463         self.append(self.memo[i])
   1464     dispatch[LONG_BINGET[0]] = load_long_binget
   1465 
   1466     def load_put(self):
   1467         i = int(self.readline()[:-1])
   1468         if i < 0:
   1469             raise ValueError("negative PUT argument")
   1470         self.memo[i] = self.stack[-1]
   1471     dispatch[PUT[0]] = load_put
   1472 
   1473     def load_binput(self):
   1474         i = self.read(1)[0]
   1475         if i < 0:
   1476             raise ValueError("negative BINPUT argument")
   1477         self.memo[i] = self.stack[-1]
   1478     dispatch[BINPUT[0]] = load_binput
   1479 
   1480     def load_long_binput(self):
   1481         i, = unpack('<I', self.read(4))
   1482         if i > maxsize:
   1483             raise ValueError("negative LONG_BINPUT argument")
   1484         self.memo[i] = self.stack[-1]
   1485     dispatch[LONG_BINPUT[0]] = load_long_binput
   1486 
   1487     def load_memoize(self):
   1488         memo = self.memo
   1489         memo[len(memo)] = self.stack[-1]
   1490     dispatch[MEMOIZE[0]] = load_memoize
   1491 
   1492     def load_append(self):
   1493         stack = self.stack
   1494         value = stack.pop()
   1495         list = stack[-1]
   1496         list.append(value)
   1497     dispatch[APPEND[0]] = load_append
   1498 
   1499     def load_appends(self):
   1500         items = self.pop_mark()
   1501         list_obj = self.stack[-1]
   1502         try:
   1503             extend = list_obj.extend
   1504         except AttributeError:
   1505             pass
   1506         else:
   1507             extend(items)
   1508             return
   1509         # Even if the PEP 307 requires extend() and append() methods,
   1510         # fall back on append() if the object has no extend() method
   1511         # for backward compatibility.
   1512         append = list_obj.append
   1513         for item in items:
   1514             append(item)
   1515     dispatch[APPENDS[0]] = load_appends
   1516 
   1517     def load_setitem(self):
   1518         stack = self.stack
   1519         value = stack.pop()
   1520         key = stack.pop()
   1521         dict = stack[-1]
   1522         dict[key] = value
   1523     dispatch[SETITEM[0]] = load_setitem
   1524 
   1525     def load_setitems(self):
   1526         items = self.pop_mark()
   1527         dict = self.stack[-1]
   1528         for i in range(0, len(items), 2):
   1529             dict[items[i]] = items[i + 1]
   1530     dispatch[SETITEMS[0]] = load_setitems
   1531 
   1532     def load_additems(self):
   1533         items = self.pop_mark()
   1534         set_obj = self.stack[-1]
   1535         if isinstance(set_obj, set):
   1536             set_obj.update(items)
   1537         else:
   1538             add = set_obj.add
   1539             for item in items:
   1540                 add(item)
   1541     dispatch[ADDITEMS[0]] = load_additems
   1542 
   1543     def load_build(self):
   1544         stack = self.stack
   1545         state = stack.pop()
   1546         inst = stack[-1]
   1547         setstate = getattr(inst, "__setstate__", None)
   1548         if setstate is not None:
   1549             setstate(state)
   1550             return
   1551         slotstate = None
   1552         if isinstance(state, tuple) and len(state) == 2:
   1553             state, slotstate = state
   1554         if state:
   1555             inst_dict = inst.__dict__
   1556             intern = sys.intern
   1557             for k, v in state.items():
   1558                 if type(k) is str:
   1559                     inst_dict[intern(k)] = v
   1560                 else:
   1561                     inst_dict[k] = v
   1562         if slotstate:
   1563             for k, v in slotstate.items():
   1564                 setattr(inst, k, v)
   1565     dispatch[BUILD[0]] = load_build
   1566 
   1567     def load_mark(self):
   1568         self.metastack.append(self.stack)
   1569         self.stack = []
   1570         self.append = self.stack.append
   1571     dispatch[MARK[0]] = load_mark
   1572 
   1573     def load_stop(self):
   1574         value = self.stack.pop()
   1575         raise _Stop(value)
   1576     dispatch[STOP[0]] = load_stop
   1577 
   1578 
   1579 # Shorthands
   1580 
   1581 def _dump(obj, file, protocol=None, *, fix_imports=True):
   1582     _Pickler(file, protocol, fix_imports=fix_imports).dump(obj)
   1583 
   1584 def _dumps(obj, protocol=None, *, fix_imports=True):
   1585     f = io.BytesIO()
   1586     _Pickler(f, protocol, fix_imports=fix_imports).dump(obj)
   1587     res = f.getvalue()
   1588     assert isinstance(res, bytes_types)
   1589     return res
   1590 
   1591 def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"):
   1592     return _Unpickler(file, fix_imports=fix_imports,
   1593                      encoding=encoding, errors=errors).load()
   1594 
   1595 def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
   1596     if isinstance(s, str):
   1597         raise TypeError("Can't load pickle from unicode string")
   1598     file = io.BytesIO(s)
   1599     return _Unpickler(file, fix_imports=fix_imports,
   1600                       encoding=encoding, errors=errors).load()
   1601 
   1602 # Use the faster _pickle if possible
   1603 try:
   1604     from _pickle import (
   1605         PickleError,
   1606         PicklingError,
   1607         UnpicklingError,
   1608         Pickler,
   1609         Unpickler,
   1610         dump,
   1611         dumps,
   1612         load,
   1613         loads
   1614     )
   1615 except ImportError:
   1616     Pickler, Unpickler = _Pickler, _Unpickler
   1617     dump, dumps, load, loads = _dump, _dumps, _load, _loads
   1618 
   1619 # Doctest
   1620 def _test():
   1621     import doctest
   1622     return doctest.testmod()
   1623 
   1624 if __name__ == "__main__":
   1625     import argparse
   1626     parser = argparse.ArgumentParser(
   1627         description='display contents of the pickle files')
   1628     parser.add_argument(
   1629         'pickle_file', type=argparse.FileType('br'),
   1630         nargs='*', help='the pickle file')
   1631     parser.add_argument(
   1632         '-t', '--test', action='store_true',
   1633         help='run self-test suite')
   1634     parser.add_argument(
   1635         '-v', action='store_true',
   1636         help='run verbosely; only affects self-test run')
   1637     args = parser.parse_args()
   1638     if args.test:
   1639         _test()
   1640     else:
   1641         if not args.pickle_file:
   1642             parser.print_help()
   1643         else:
   1644             import pprint
   1645             for f in args.pickle_file:
   1646                 obj = load(f)
   1647                 pprint.pprint(obj)
   1648