1 #include "Python.h" 2 #include "structmember.h" 3 4 PyDoc_STRVAR(pickle_module_doc, 5 "Optimized C implementation for the Python pickle module."); 6 7 /*[clinic input] 8 module _pickle 9 class _pickle.Pickler "PicklerObject *" "&Pickler_Type" 10 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType" 11 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type" 12 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType" 13 [clinic start generated code]*/ 14 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/ 15 16 /* Bump this when new opcodes are added to the pickle protocol. */ 17 enum { 18 HIGHEST_PROTOCOL = 4, 19 DEFAULT_PROTOCOL = 3 20 }; 21 22 /* Pickle opcodes. These must be kept updated with pickle.py. 23 Extensive docs are in pickletools.py. */ 24 enum opcode { 25 MARK = '(', 26 STOP = '.', 27 POP = '0', 28 POP_MARK = '1', 29 DUP = '2', 30 FLOAT = 'F', 31 INT = 'I', 32 BININT = 'J', 33 BININT1 = 'K', 34 LONG = 'L', 35 BININT2 = 'M', 36 NONE = 'N', 37 PERSID = 'P', 38 BINPERSID = 'Q', 39 REDUCE = 'R', 40 STRING = 'S', 41 BINSTRING = 'T', 42 SHORT_BINSTRING = 'U', 43 UNICODE = 'V', 44 BINUNICODE = 'X', 45 APPEND = 'a', 46 BUILD = 'b', 47 GLOBAL = 'c', 48 DICT = 'd', 49 EMPTY_DICT = '}', 50 APPENDS = 'e', 51 GET = 'g', 52 BINGET = 'h', 53 INST = 'i', 54 LONG_BINGET = 'j', 55 LIST = 'l', 56 EMPTY_LIST = ']', 57 OBJ = 'o', 58 PUT = 'p', 59 BINPUT = 'q', 60 LONG_BINPUT = 'r', 61 SETITEM = 's', 62 TUPLE = 't', 63 EMPTY_TUPLE = ')', 64 SETITEMS = 'u', 65 BINFLOAT = 'G', 66 67 /* Protocol 2. */ 68 PROTO = '\x80', 69 NEWOBJ = '\x81', 70 EXT1 = '\x82', 71 EXT2 = '\x83', 72 EXT4 = '\x84', 73 TUPLE1 = '\x85', 74 TUPLE2 = '\x86', 75 TUPLE3 = '\x87', 76 NEWTRUE = '\x88', 77 NEWFALSE = '\x89', 78 LONG1 = '\x8a', 79 LONG4 = '\x8b', 80 81 /* Protocol 3 (Python 3.x) */ 82 BINBYTES = 'B', 83 SHORT_BINBYTES = 'C', 84 85 /* Protocol 4 */ 86 SHORT_BINUNICODE = '\x8c', 87 BINUNICODE8 = '\x8d', 88 BINBYTES8 = '\x8e', 89 EMPTY_SET = '\x8f', 90 ADDITEMS = '\x90', 91 FROZENSET = '\x91', 92 NEWOBJ_EX = '\x92', 93 STACK_GLOBAL = '\x93', 94 MEMOIZE = '\x94', 95 FRAME = '\x95' 96 }; 97 98 enum { 99 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements 100 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will 101 break if this gets out of synch with pickle.py, but it's unclear that would 102 help anything either. */ 103 BATCHSIZE = 1000, 104 105 /* Nesting limit until Pickler, when running in "fast mode", starts 106 checking for self-referential data-structures. */ 107 FAST_NESTING_LIMIT = 50, 108 109 /* Initial size of the write buffer of Pickler. */ 110 WRITE_BUF_SIZE = 4096, 111 112 /* Prefetch size when unpickling (disabled on unpeekable streams) */ 113 PREFETCH = 8192 * 16, 114 115 FRAME_SIZE_TARGET = 64 * 1024, 116 117 FRAME_HEADER_SIZE = 9 118 }; 119 120 /*************************************************************************/ 121 122 /* State of the pickle module, per PEP 3121. */ 123 typedef struct { 124 /* Exception classes for pickle. */ 125 PyObject *PickleError; 126 PyObject *PicklingError; 127 PyObject *UnpicklingError; 128 129 /* copyreg.dispatch_table, {type_object: pickling_function} */ 130 PyObject *dispatch_table; 131 132 /* For the extension opcodes EXT1, EXT2 and EXT4. */ 133 134 /* copyreg._extension_registry, {(module_name, function_name): code} */ 135 PyObject *extension_registry; 136 /* copyreg._extension_cache, {code: object} */ 137 PyObject *extension_cache; 138 /* copyreg._inverted_registry, {code: (module_name, function_name)} */ 139 PyObject *inverted_registry; 140 141 /* Import mappings for compatibility with Python 2.x */ 142 143 /* _compat_pickle.NAME_MAPPING, 144 {(oldmodule, oldname): (newmodule, newname)} */ 145 PyObject *name_mapping_2to3; 146 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */ 147 PyObject *import_mapping_2to3; 148 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */ 149 PyObject *name_mapping_3to2; 150 PyObject *import_mapping_3to2; 151 152 /* codecs.encode, used for saving bytes in older protocols */ 153 PyObject *codecs_encode; 154 /* builtins.getattr, used for saving nested names with protocol < 4 */ 155 PyObject *getattr; 156 /* functools.partial, used for implementing __newobj_ex__ with protocols 157 2 and 3 */ 158 PyObject *partial; 159 } PickleState; 160 161 /* Forward declaration of the _pickle module definition. */ 162 static struct PyModuleDef _picklemodule; 163 164 /* Given a module object, get its per-module state. */ 165 static PickleState * 166 _Pickle_GetState(PyObject *module) 167 { 168 return (PickleState *)PyModule_GetState(module); 169 } 170 171 /* Find the module instance imported in the currently running sub-interpreter 172 and get its state. */ 173 static PickleState * 174 _Pickle_GetGlobalState(void) 175 { 176 return _Pickle_GetState(PyState_FindModule(&_picklemodule)); 177 } 178 179 /* Clear the given pickle module state. */ 180 static void 181 _Pickle_ClearState(PickleState *st) 182 { 183 Py_CLEAR(st->PickleError); 184 Py_CLEAR(st->PicklingError); 185 Py_CLEAR(st->UnpicklingError); 186 Py_CLEAR(st->dispatch_table); 187 Py_CLEAR(st->extension_registry); 188 Py_CLEAR(st->extension_cache); 189 Py_CLEAR(st->inverted_registry); 190 Py_CLEAR(st->name_mapping_2to3); 191 Py_CLEAR(st->import_mapping_2to3); 192 Py_CLEAR(st->name_mapping_3to2); 193 Py_CLEAR(st->import_mapping_3to2); 194 Py_CLEAR(st->codecs_encode); 195 Py_CLEAR(st->getattr); 196 Py_CLEAR(st->partial); 197 } 198 199 /* Initialize the given pickle module state. */ 200 static int 201 _Pickle_InitState(PickleState *st) 202 { 203 PyObject *builtins; 204 PyObject *copyreg = NULL; 205 PyObject *compat_pickle = NULL; 206 PyObject *codecs = NULL; 207 PyObject *functools = NULL; 208 209 builtins = PyEval_GetBuiltins(); 210 if (builtins == NULL) 211 goto error; 212 st->getattr = PyDict_GetItemString(builtins, "getattr"); 213 if (st->getattr == NULL) 214 goto error; 215 Py_INCREF(st->getattr); 216 217 copyreg = PyImport_ImportModule("copyreg"); 218 if (!copyreg) 219 goto error; 220 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table"); 221 if (!st->dispatch_table) 222 goto error; 223 if (!PyDict_CheckExact(st->dispatch_table)) { 224 PyErr_Format(PyExc_RuntimeError, 225 "copyreg.dispatch_table should be a dict, not %.200s", 226 Py_TYPE(st->dispatch_table)->tp_name); 227 goto error; 228 } 229 st->extension_registry = \ 230 PyObject_GetAttrString(copyreg, "_extension_registry"); 231 if (!st->extension_registry) 232 goto error; 233 if (!PyDict_CheckExact(st->extension_registry)) { 234 PyErr_Format(PyExc_RuntimeError, 235 "copyreg._extension_registry should be a dict, " 236 "not %.200s", Py_TYPE(st->extension_registry)->tp_name); 237 goto error; 238 } 239 st->inverted_registry = \ 240 PyObject_GetAttrString(copyreg, "_inverted_registry"); 241 if (!st->inverted_registry) 242 goto error; 243 if (!PyDict_CheckExact(st->inverted_registry)) { 244 PyErr_Format(PyExc_RuntimeError, 245 "copyreg._inverted_registry should be a dict, " 246 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name); 247 goto error; 248 } 249 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache"); 250 if (!st->extension_cache) 251 goto error; 252 if (!PyDict_CheckExact(st->extension_cache)) { 253 PyErr_Format(PyExc_RuntimeError, 254 "copyreg._extension_cache should be a dict, " 255 "not %.200s", Py_TYPE(st->extension_cache)->tp_name); 256 goto error; 257 } 258 Py_CLEAR(copyreg); 259 260 /* Load the 2.x -> 3.x stdlib module mapping tables */ 261 compat_pickle = PyImport_ImportModule("_compat_pickle"); 262 if (!compat_pickle) 263 goto error; 264 st->name_mapping_2to3 = \ 265 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING"); 266 if (!st->name_mapping_2to3) 267 goto error; 268 if (!PyDict_CheckExact(st->name_mapping_2to3)) { 269 PyErr_Format(PyExc_RuntimeError, 270 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s", 271 Py_TYPE(st->name_mapping_2to3)->tp_name); 272 goto error; 273 } 274 st->import_mapping_2to3 = \ 275 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING"); 276 if (!st->import_mapping_2to3) 277 goto error; 278 if (!PyDict_CheckExact(st->import_mapping_2to3)) { 279 PyErr_Format(PyExc_RuntimeError, 280 "_compat_pickle.IMPORT_MAPPING should be a dict, " 281 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name); 282 goto error; 283 } 284 /* ... and the 3.x -> 2.x mapping tables */ 285 st->name_mapping_3to2 = \ 286 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING"); 287 if (!st->name_mapping_3to2) 288 goto error; 289 if (!PyDict_CheckExact(st->name_mapping_3to2)) { 290 PyErr_Format(PyExc_RuntimeError, 291 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, " 292 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name); 293 goto error; 294 } 295 st->import_mapping_3to2 = \ 296 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING"); 297 if (!st->import_mapping_3to2) 298 goto error; 299 if (!PyDict_CheckExact(st->import_mapping_3to2)) { 300 PyErr_Format(PyExc_RuntimeError, 301 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, " 302 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name); 303 goto error; 304 } 305 Py_CLEAR(compat_pickle); 306 307 codecs = PyImport_ImportModule("codecs"); 308 if (codecs == NULL) 309 goto error; 310 st->codecs_encode = PyObject_GetAttrString(codecs, "encode"); 311 if (st->codecs_encode == NULL) { 312 goto error; 313 } 314 if (!PyCallable_Check(st->codecs_encode)) { 315 PyErr_Format(PyExc_RuntimeError, 316 "codecs.encode should be a callable, not %.200s", 317 Py_TYPE(st->codecs_encode)->tp_name); 318 goto error; 319 } 320 Py_CLEAR(codecs); 321 322 functools = PyImport_ImportModule("functools"); 323 if (!functools) 324 goto error; 325 st->partial = PyObject_GetAttrString(functools, "partial"); 326 if (!st->partial) 327 goto error; 328 Py_CLEAR(functools); 329 330 return 0; 331 332 error: 333 Py_CLEAR(copyreg); 334 Py_CLEAR(compat_pickle); 335 Py_CLEAR(codecs); 336 Py_CLEAR(functools); 337 _Pickle_ClearState(st); 338 return -1; 339 } 340 341 /* Helper for calling a function with a single argument quickly. 342 343 This function steals the reference of the given argument. */ 344 static PyObject * 345 _Pickle_FastCall(PyObject *func, PyObject *obj) 346 { 347 PyObject *result; 348 349 result = _PyObject_CallArg1(func, obj); 350 Py_DECREF(obj); 351 return result; 352 } 353 354 /*************************************************************************/ 355 356 /* Internal data type used as the unpickling stack. */ 357 typedef struct { 358 PyObject_VAR_HEAD 359 PyObject **data; 360 int mark_set; /* is MARK set? */ 361 Py_ssize_t fence; /* position of top MARK or 0 */ 362 Py_ssize_t allocated; /* number of slots in data allocated */ 363 } Pdata; 364 365 static void 366 Pdata_dealloc(Pdata *self) 367 { 368 Py_ssize_t i = Py_SIZE(self); 369 while (--i >= 0) { 370 Py_DECREF(self->data[i]); 371 } 372 PyMem_FREE(self->data); 373 PyObject_Del(self); 374 } 375 376 static PyTypeObject Pdata_Type = { 377 PyVarObject_HEAD_INIT(NULL, 0) 378 "_pickle.Pdata", /*tp_name*/ 379 sizeof(Pdata), /*tp_basicsize*/ 380 sizeof(PyObject *), /*tp_itemsize*/ 381 (destructor)Pdata_dealloc, /*tp_dealloc*/ 382 }; 383 384 static PyObject * 385 Pdata_New(void) 386 { 387 Pdata *self; 388 389 if (!(self = PyObject_New(Pdata, &Pdata_Type))) 390 return NULL; 391 Py_SIZE(self) = 0; 392 self->mark_set = 0; 393 self->fence = 0; 394 self->allocated = 8; 395 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *)); 396 if (self->data) 397 return (PyObject *)self; 398 Py_DECREF(self); 399 return PyErr_NoMemory(); 400 } 401 402 403 /* Retain only the initial clearto items. If clearto >= the current 404 * number of items, this is a (non-erroneous) NOP. 405 */ 406 static int 407 Pdata_clear(Pdata *self, Py_ssize_t clearto) 408 { 409 Py_ssize_t i = Py_SIZE(self); 410 411 assert(clearto >= self->fence); 412 if (clearto >= i) 413 return 0; 414 415 while (--i >= clearto) { 416 Py_CLEAR(self->data[i]); 417 } 418 Py_SIZE(self) = clearto; 419 return 0; 420 } 421 422 static int 423 Pdata_grow(Pdata *self) 424 { 425 PyObject **data = self->data; 426 size_t allocated = (size_t)self->allocated; 427 size_t new_allocated; 428 429 new_allocated = (allocated >> 3) + 6; 430 /* check for integer overflow */ 431 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated) 432 goto nomemory; 433 new_allocated += allocated; 434 PyMem_RESIZE(data, PyObject *, new_allocated); 435 if (data == NULL) 436 goto nomemory; 437 438 self->data = data; 439 self->allocated = (Py_ssize_t)new_allocated; 440 return 0; 441 442 nomemory: 443 PyErr_NoMemory(); 444 return -1; 445 } 446 447 static int 448 Pdata_stack_underflow(Pdata *self) 449 { 450 PickleState *st = _Pickle_GetGlobalState(); 451 PyErr_SetString(st->UnpicklingError, 452 self->mark_set ? 453 "unexpected MARK found" : 454 "unpickling stack underflow"); 455 return -1; 456 } 457 458 /* D is a Pdata*. Pop the topmost element and store it into V, which 459 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError 460 * is raised and V is set to NULL. 461 */ 462 static PyObject * 463 Pdata_pop(Pdata *self) 464 { 465 if (Py_SIZE(self) <= self->fence) { 466 Pdata_stack_underflow(self); 467 return NULL; 468 } 469 return self->data[--Py_SIZE(self)]; 470 } 471 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0) 472 473 static int 474 Pdata_push(Pdata *self, PyObject *obj) 475 { 476 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) { 477 return -1; 478 } 479 self->data[Py_SIZE(self)++] = obj; 480 return 0; 481 } 482 483 /* Push an object on stack, transferring its ownership to the stack. */ 484 #define PDATA_PUSH(D, O, ER) do { \ 485 if (Pdata_push((D), (O)) < 0) return (ER); } while(0) 486 487 /* Push an object on stack, adding a new reference to the object. */ 488 #define PDATA_APPEND(D, O, ER) do { \ 489 Py_INCREF((O)); \ 490 if (Pdata_push((D), (O)) < 0) return (ER); } while(0) 491 492 static PyObject * 493 Pdata_poptuple(Pdata *self, Py_ssize_t start) 494 { 495 PyObject *tuple; 496 Py_ssize_t len, i, j; 497 498 if (start < self->fence) { 499 Pdata_stack_underflow(self); 500 return NULL; 501 } 502 len = Py_SIZE(self) - start; 503 tuple = PyTuple_New(len); 504 if (tuple == NULL) 505 return NULL; 506 for (i = start, j = 0; j < len; i++, j++) 507 PyTuple_SET_ITEM(tuple, j, self->data[i]); 508 509 Py_SIZE(self) = start; 510 return tuple; 511 } 512 513 static PyObject * 514 Pdata_poplist(Pdata *self, Py_ssize_t start) 515 { 516 PyObject *list; 517 Py_ssize_t len, i, j; 518 519 len = Py_SIZE(self) - start; 520 list = PyList_New(len); 521 if (list == NULL) 522 return NULL; 523 for (i = start, j = 0; j < len; i++, j++) 524 PyList_SET_ITEM(list, j, self->data[i]); 525 526 Py_SIZE(self) = start; 527 return list; 528 } 529 530 typedef struct { 531 PyObject *me_key; 532 Py_ssize_t me_value; 533 } PyMemoEntry; 534 535 typedef struct { 536 Py_ssize_t mt_mask; 537 Py_ssize_t mt_used; 538 Py_ssize_t mt_allocated; 539 PyMemoEntry *mt_table; 540 } PyMemoTable; 541 542 typedef struct PicklerObject { 543 PyObject_HEAD 544 PyMemoTable *memo; /* Memo table, keep track of the seen 545 objects to support self-referential objects 546 pickling. */ 547 PyObject *pers_func; /* persistent_id() method, can be NULL */ 548 PyObject *dispatch_table; /* private dispatch_table, can be NULL */ 549 550 PyObject *write; /* write() method of the output stream. */ 551 PyObject *output_buffer; /* Write into a local bytearray buffer before 552 flushing to the stream. */ 553 Py_ssize_t output_len; /* Length of output_buffer. */ 554 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */ 555 int proto; /* Pickle protocol number, >= 0 */ 556 int bin; /* Boolean, true if proto > 0 */ 557 int framing; /* True when framing is enabled, proto >= 4 */ 558 Py_ssize_t frame_start; /* Position in output_buffer where the 559 current frame begins. -1 if there 560 is no frame currently open. */ 561 562 Py_ssize_t buf_size; /* Size of the current buffered pickle data */ 563 int fast; /* Enable fast mode if set to a true value. 564 The fast mode disable the usage of memo, 565 therefore speeding the pickling process by 566 not generating superfluous PUT opcodes. It 567 should not be used if with self-referential 568 objects. */ 569 int fast_nesting; 570 int fix_imports; /* Indicate whether Pickler should fix 571 the name of globals for Python 2.x. */ 572 PyObject *fast_memo; 573 } PicklerObject; 574 575 typedef struct UnpicklerObject { 576 PyObject_HEAD 577 Pdata *stack; /* Pickle data stack, store unpickled objects. */ 578 579 /* The unpickler memo is just an array of PyObject *s. Using a dict 580 is unnecessary, since the keys are contiguous ints. */ 581 PyObject **memo; 582 Py_ssize_t memo_size; /* Capacity of the memo array */ 583 Py_ssize_t memo_len; /* Number of objects in the memo */ 584 585 PyObject *pers_func; /* persistent_load() method, can be NULL. */ 586 587 Py_buffer buffer; 588 char *input_buffer; 589 char *input_line; 590 Py_ssize_t input_len; 591 Py_ssize_t next_read_idx; 592 Py_ssize_t prefetched_idx; /* index of first prefetched byte */ 593 594 PyObject *read; /* read() method of the input stream. */ 595 PyObject *readline; /* readline() method of the input stream. */ 596 PyObject *peek; /* peek() method of the input stream, or NULL */ 597 598 char *encoding; /* Name of the encoding to be used for 599 decoding strings pickled using Python 600 2.x. The default value is "ASCII" */ 601 char *errors; /* Name of errors handling scheme to used when 602 decoding strings. The default value is 603 "strict". */ 604 Py_ssize_t *marks; /* Mark stack, used for unpickling container 605 objects. */ 606 Py_ssize_t num_marks; /* Number of marks in the mark stack. */ 607 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */ 608 int proto; /* Protocol of the pickle loaded. */ 609 int fix_imports; /* Indicate whether Unpickler should fix 610 the name of globals pickled by Python 2.x. */ 611 } UnpicklerObject; 612 613 typedef struct { 614 PyObject_HEAD 615 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */ 616 } PicklerMemoProxyObject; 617 618 typedef struct { 619 PyObject_HEAD 620 UnpicklerObject *unpickler; 621 } UnpicklerMemoProxyObject; 622 623 /* Forward declarations */ 624 static int save(PicklerObject *, PyObject *, int); 625 static int save_reduce(PicklerObject *, PyObject *, PyObject *); 626 static PyTypeObject Pickler_Type; 627 static PyTypeObject Unpickler_Type; 628 629 #include "clinic/_pickle.c.h" 630 631 /************************************************************************* 632 A custom hashtable mapping void* to Python ints. This is used by the pickler 633 for memoization. Using a custom hashtable rather than PyDict allows us to skip 634 a bunch of unnecessary object creation. This makes a huge performance 635 difference. */ 636 637 #define MT_MINSIZE 8 638 #define PERTURB_SHIFT 5 639 640 641 static PyMemoTable * 642 PyMemoTable_New(void) 643 { 644 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable)); 645 if (memo == NULL) { 646 PyErr_NoMemory(); 647 return NULL; 648 } 649 650 memo->mt_used = 0; 651 memo->mt_allocated = MT_MINSIZE; 652 memo->mt_mask = MT_MINSIZE - 1; 653 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry)); 654 if (memo->mt_table == NULL) { 655 PyMem_FREE(memo); 656 PyErr_NoMemory(); 657 return NULL; 658 } 659 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry)); 660 661 return memo; 662 } 663 664 static PyMemoTable * 665 PyMemoTable_Copy(PyMemoTable *self) 666 { 667 Py_ssize_t i; 668 PyMemoTable *new = PyMemoTable_New(); 669 if (new == NULL) 670 return NULL; 671 672 new->mt_used = self->mt_used; 673 new->mt_allocated = self->mt_allocated; 674 new->mt_mask = self->mt_mask; 675 /* The table we get from _New() is probably smaller than we wanted. 676 Free it and allocate one that's the right size. */ 677 PyMem_FREE(new->mt_table); 678 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated); 679 if (new->mt_table == NULL) { 680 PyMem_FREE(new); 681 PyErr_NoMemory(); 682 return NULL; 683 } 684 for (i = 0; i < self->mt_allocated; i++) { 685 Py_XINCREF(self->mt_table[i].me_key); 686 } 687 memcpy(new->mt_table, self->mt_table, 688 sizeof(PyMemoEntry) * self->mt_allocated); 689 690 return new; 691 } 692 693 static Py_ssize_t 694 PyMemoTable_Size(PyMemoTable *self) 695 { 696 return self->mt_used; 697 } 698 699 static int 700 PyMemoTable_Clear(PyMemoTable *self) 701 { 702 Py_ssize_t i = self->mt_allocated; 703 704 while (--i >= 0) { 705 Py_XDECREF(self->mt_table[i].me_key); 706 } 707 self->mt_used = 0; 708 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry)); 709 return 0; 710 } 711 712 static void 713 PyMemoTable_Del(PyMemoTable *self) 714 { 715 if (self == NULL) 716 return; 717 PyMemoTable_Clear(self); 718 719 PyMem_FREE(self->mt_table); 720 PyMem_FREE(self); 721 } 722 723 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup() 724 can be considerably simpler than dictobject.c's lookdict(). */ 725 static PyMemoEntry * 726 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key) 727 { 728 size_t i; 729 size_t perturb; 730 size_t mask = (size_t)self->mt_mask; 731 PyMemoEntry *table = self->mt_table; 732 PyMemoEntry *entry; 733 Py_hash_t hash = (Py_hash_t)key >> 3; 734 735 i = hash & mask; 736 entry = &table[i]; 737 if (entry->me_key == NULL || entry->me_key == key) 738 return entry; 739 740 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { 741 i = (i << 2) + i + perturb + 1; 742 entry = &table[i & mask]; 743 if (entry->me_key == NULL || entry->me_key == key) 744 return entry; 745 } 746 assert(0); /* Never reached */ 747 return NULL; 748 } 749 750 /* Returns -1 on failure, 0 on success. */ 751 static int 752 _PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size) 753 { 754 PyMemoEntry *oldtable = NULL; 755 PyMemoEntry *oldentry, *newentry; 756 Py_ssize_t new_size = MT_MINSIZE; 757 Py_ssize_t to_process; 758 759 assert(min_size > 0); 760 761 /* Find the smallest valid table size >= min_size. */ 762 while (new_size < min_size && new_size > 0) 763 new_size <<= 1; 764 if (new_size <= 0) { 765 PyErr_NoMemory(); 766 return -1; 767 } 768 /* new_size needs to be a power of two. */ 769 assert((new_size & (new_size - 1)) == 0); 770 771 /* Allocate new table. */ 772 oldtable = self->mt_table; 773 self->mt_table = PyMem_NEW(PyMemoEntry, new_size); 774 if (self->mt_table == NULL) { 775 self->mt_table = oldtable; 776 PyErr_NoMemory(); 777 return -1; 778 } 779 self->mt_allocated = new_size; 780 self->mt_mask = new_size - 1; 781 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size); 782 783 /* Copy entries from the old table. */ 784 to_process = self->mt_used; 785 for (oldentry = oldtable; to_process > 0; oldentry++) { 786 if (oldentry->me_key != NULL) { 787 to_process--; 788 /* newentry is a pointer to a chunk of the new 789 mt_table, so we're setting the key:value pair 790 in-place. */ 791 newentry = _PyMemoTable_Lookup(self, oldentry->me_key); 792 newentry->me_key = oldentry->me_key; 793 newentry->me_value = oldentry->me_value; 794 } 795 } 796 797 /* Deallocate the old table. */ 798 PyMem_FREE(oldtable); 799 return 0; 800 } 801 802 /* Returns NULL on failure, a pointer to the value otherwise. */ 803 static Py_ssize_t * 804 PyMemoTable_Get(PyMemoTable *self, PyObject *key) 805 { 806 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key); 807 if (entry->me_key == NULL) 808 return NULL; 809 return &entry->me_value; 810 } 811 812 /* Returns -1 on failure, 0 on success. */ 813 static int 814 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value) 815 { 816 PyMemoEntry *entry; 817 818 assert(key != NULL); 819 820 entry = _PyMemoTable_Lookup(self, key); 821 if (entry->me_key != NULL) { 822 entry->me_value = value; 823 return 0; 824 } 825 Py_INCREF(key); 826 entry->me_key = key; 827 entry->me_value = value; 828 self->mt_used++; 829 830 /* If we added a key, we can safely resize. Otherwise just return! 831 * If used >= 2/3 size, adjust size. Normally, this quaduples the size. 832 * 833 * Quadrupling the size improves average table sparseness 834 * (reducing collisions) at the cost of some memory. It also halves 835 * the number of expensive resize operations in a growing memo table. 836 * 837 * Very large memo tables (over 50K items) use doubling instead. 838 * This may help applications with severe memory constraints. 839 */ 840 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2)) 841 return 0; 842 return _PyMemoTable_ResizeTable(self, 843 (self->mt_used > 50000 ? 2 : 4) * self->mt_used); 844 } 845 846 #undef MT_MINSIZE 847 #undef PERTURB_SHIFT 848 849 /*************************************************************************/ 850 851 852 static int 853 _Pickler_ClearBuffer(PicklerObject *self) 854 { 855 Py_XSETREF(self->output_buffer, 856 PyBytes_FromStringAndSize(NULL, self->max_output_len)); 857 if (self->output_buffer == NULL) 858 return -1; 859 self->output_len = 0; 860 self->frame_start = -1; 861 return 0; 862 } 863 864 static void 865 _write_size64(char *out, size_t value) 866 { 867 size_t i; 868 869 Py_BUILD_ASSERT(sizeof(size_t) <= 8); 870 871 for (i = 0; i < sizeof(size_t); i++) { 872 out[i] = (unsigned char)((value >> (8 * i)) & 0xff); 873 } 874 for (i = sizeof(size_t); i < 8; i++) { 875 out[i] = 0; 876 } 877 } 878 879 static void 880 _Pickler_WriteFrameHeader(PicklerObject *self, char *qdata, size_t frame_len) 881 { 882 qdata[0] = FRAME; 883 _write_size64(qdata + 1, frame_len); 884 } 885 886 static int 887 _Pickler_CommitFrame(PicklerObject *self) 888 { 889 size_t frame_len; 890 char *qdata; 891 892 if (!self->framing || self->frame_start == -1) 893 return 0; 894 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE; 895 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start; 896 _Pickler_WriteFrameHeader(self, qdata, frame_len); 897 self->frame_start = -1; 898 return 0; 899 } 900 901 static int 902 _Pickler_OpcodeBoundary(PicklerObject *self) 903 { 904 Py_ssize_t frame_len; 905 906 if (!self->framing || self->frame_start == -1) 907 return 0; 908 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE; 909 if (frame_len >= FRAME_SIZE_TARGET) 910 return _Pickler_CommitFrame(self); 911 else 912 return 0; 913 } 914 915 static PyObject * 916 _Pickler_GetString(PicklerObject *self) 917 { 918 PyObject *output_buffer = self->output_buffer; 919 920 assert(self->output_buffer != NULL); 921 922 if (_Pickler_CommitFrame(self)) 923 return NULL; 924 925 self->output_buffer = NULL; 926 /* Resize down to exact size */ 927 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0) 928 return NULL; 929 return output_buffer; 930 } 931 932 static int 933 _Pickler_FlushToFile(PicklerObject *self) 934 { 935 PyObject *output, *result; 936 937 assert(self->write != NULL); 938 939 /* This will commit the frame first */ 940 output = _Pickler_GetString(self); 941 if (output == NULL) 942 return -1; 943 944 result = _Pickle_FastCall(self->write, output); 945 Py_XDECREF(result); 946 return (result == NULL) ? -1 : 0; 947 } 948 949 static Py_ssize_t 950 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len) 951 { 952 Py_ssize_t i, n, required; 953 char *buffer; 954 int need_new_frame; 955 956 assert(s != NULL); 957 need_new_frame = (self->framing && self->frame_start == -1); 958 959 if (need_new_frame) 960 n = data_len + FRAME_HEADER_SIZE; 961 else 962 n = data_len; 963 964 required = self->output_len + n; 965 if (required > self->max_output_len) { 966 /* Make place in buffer for the pickle chunk */ 967 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) { 968 PyErr_NoMemory(); 969 return -1; 970 } 971 self->max_output_len = (self->output_len + n) / 2 * 3; 972 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0) 973 return -1; 974 } 975 buffer = PyBytes_AS_STRING(self->output_buffer); 976 if (need_new_frame) { 977 /* Setup new frame */ 978 Py_ssize_t frame_start = self->output_len; 979 self->frame_start = frame_start; 980 for (i = 0; i < FRAME_HEADER_SIZE; i++) { 981 /* Write an invalid value, for debugging */ 982 buffer[frame_start + i] = 0xFE; 983 } 984 self->output_len += FRAME_HEADER_SIZE; 985 } 986 if (data_len < 8) { 987 /* This is faster than memcpy when the string is short. */ 988 for (i = 0; i < data_len; i++) { 989 buffer[self->output_len + i] = s[i]; 990 } 991 } 992 else { 993 memcpy(buffer + self->output_len, s, data_len); 994 } 995 self->output_len += data_len; 996 return data_len; 997 } 998 999 static PicklerObject * 1000 _Pickler_New(void) 1001 { 1002 PicklerObject *self; 1003 1004 self = PyObject_GC_New(PicklerObject, &Pickler_Type); 1005 if (self == NULL) 1006 return NULL; 1007 1008 self->pers_func = NULL; 1009 self->dispatch_table = NULL; 1010 self->write = NULL; 1011 self->proto = 0; 1012 self->bin = 0; 1013 self->framing = 0; 1014 self->frame_start = -1; 1015 self->fast = 0; 1016 self->fast_nesting = 0; 1017 self->fix_imports = 0; 1018 self->fast_memo = NULL; 1019 self->max_output_len = WRITE_BUF_SIZE; 1020 self->output_len = 0; 1021 1022 self->memo = PyMemoTable_New(); 1023 self->output_buffer = PyBytes_FromStringAndSize(NULL, 1024 self->max_output_len); 1025 1026 if (self->memo == NULL || self->output_buffer == NULL) { 1027 Py_DECREF(self); 1028 return NULL; 1029 } 1030 return self; 1031 } 1032 1033 static int 1034 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports) 1035 { 1036 long proto; 1037 1038 if (protocol == NULL || protocol == Py_None) { 1039 proto = DEFAULT_PROTOCOL; 1040 } 1041 else { 1042 proto = PyLong_AsLong(protocol); 1043 if (proto < 0) { 1044 if (proto == -1 && PyErr_Occurred()) 1045 return -1; 1046 proto = HIGHEST_PROTOCOL; 1047 } 1048 else if (proto > HIGHEST_PROTOCOL) { 1049 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d", 1050 HIGHEST_PROTOCOL); 1051 return -1; 1052 } 1053 } 1054 self->proto = (int)proto; 1055 self->bin = proto > 0; 1056 self->fix_imports = fix_imports && proto < 3; 1057 return 0; 1058 } 1059 1060 /* Returns -1 (with an exception set) on failure, 0 on success. This may 1061 be called once on a freshly created Pickler. */ 1062 static int 1063 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file) 1064 { 1065 _Py_IDENTIFIER(write); 1066 assert(file != NULL); 1067 self->write = _PyObject_GetAttrId(file, &PyId_write); 1068 if (self->write == NULL) { 1069 if (PyErr_ExceptionMatches(PyExc_AttributeError)) 1070 PyErr_SetString(PyExc_TypeError, 1071 "file must have a 'write' attribute"); 1072 return -1; 1073 } 1074 1075 return 0; 1076 } 1077 1078 /* Returns the size of the input on success, -1 on failure. This takes its 1079 own reference to `input`. */ 1080 static Py_ssize_t 1081 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input) 1082 { 1083 if (self->buffer.buf != NULL) 1084 PyBuffer_Release(&self->buffer); 1085 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0) 1086 return -1; 1087 self->input_buffer = self->buffer.buf; 1088 self->input_len = self->buffer.len; 1089 self->next_read_idx = 0; 1090 self->prefetched_idx = self->input_len; 1091 return self->input_len; 1092 } 1093 1094 static int 1095 bad_readline(void) 1096 { 1097 PickleState *st = _Pickle_GetGlobalState(); 1098 PyErr_SetString(st->UnpicklingError, "pickle data was truncated"); 1099 return -1; 1100 } 1101 1102 static int 1103 _Unpickler_SkipConsumed(UnpicklerObject *self) 1104 { 1105 Py_ssize_t consumed; 1106 PyObject *r; 1107 1108 consumed = self->next_read_idx - self->prefetched_idx; 1109 if (consumed <= 0) 1110 return 0; 1111 1112 assert(self->peek); /* otherwise we did something wrong */ 1113 /* This makes a useless copy... */ 1114 r = PyObject_CallFunction(self->read, "n", consumed); 1115 if (r == NULL) 1116 return -1; 1117 Py_DECREF(r); 1118 1119 self->prefetched_idx = self->next_read_idx; 1120 return 0; 1121 } 1122 1123 static const Py_ssize_t READ_WHOLE_LINE = -1; 1124 1125 /* If reading from a file, we need to only pull the bytes we need, since there 1126 may be multiple pickle objects arranged contiguously in the same input 1127 buffer. 1128 1129 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n` 1130 bytes from the input stream/buffer. 1131 1132 Update the unpickler's input buffer with the newly-read data. Returns -1 on 1133 failure; on success, returns the number of bytes read from the file. 1134 1135 On success, self->input_len will be 0; this is intentional so that when 1136 unpickling from a file, the "we've run out of data" code paths will trigger, 1137 causing the Unpickler to go back to the file for more data. Use the returned 1138 size to tell you how much data you can process. */ 1139 static Py_ssize_t 1140 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n) 1141 { 1142 PyObject *data; 1143 Py_ssize_t read_size; 1144 1145 assert(self->read != NULL); 1146 1147 if (_Unpickler_SkipConsumed(self) < 0) 1148 return -1; 1149 1150 if (n == READ_WHOLE_LINE) { 1151 data = _PyObject_CallNoArg(self->readline); 1152 } 1153 else { 1154 PyObject *len; 1155 /* Prefetch some data without advancing the file pointer, if possible */ 1156 if (self->peek && n < PREFETCH) { 1157 len = PyLong_FromSsize_t(PREFETCH); 1158 if (len == NULL) 1159 return -1; 1160 data = _Pickle_FastCall(self->peek, len); 1161 if (data == NULL) { 1162 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError)) 1163 return -1; 1164 /* peek() is probably not supported by the given file object */ 1165 PyErr_Clear(); 1166 Py_CLEAR(self->peek); 1167 } 1168 else { 1169 read_size = _Unpickler_SetStringInput(self, data); 1170 Py_DECREF(data); 1171 self->prefetched_idx = 0; 1172 if (n <= read_size) 1173 return n; 1174 } 1175 } 1176 len = PyLong_FromSsize_t(n); 1177 if (len == NULL) 1178 return -1; 1179 data = _Pickle_FastCall(self->read, len); 1180 } 1181 if (data == NULL) 1182 return -1; 1183 1184 read_size = _Unpickler_SetStringInput(self, data); 1185 Py_DECREF(data); 1186 return read_size; 1187 } 1188 1189 /* Don't call it directly: use _Unpickler_Read() */ 1190 static Py_ssize_t 1191 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n) 1192 { 1193 Py_ssize_t num_read; 1194 1195 *s = NULL; 1196 if (self->next_read_idx > PY_SSIZE_T_MAX - n) { 1197 PickleState *st = _Pickle_GetGlobalState(); 1198 PyErr_SetString(st->UnpicklingError, 1199 "read would overflow (invalid bytecode)"); 1200 return -1; 1201 } 1202 1203 /* This case is handled by the _Unpickler_Read() macro for efficiency */ 1204 assert(self->next_read_idx + n > self->input_len); 1205 1206 if (!self->read) 1207 return bad_readline(); 1208 1209 num_read = _Unpickler_ReadFromFile(self, n); 1210 if (num_read < 0) 1211 return -1; 1212 if (num_read < n) 1213 return bad_readline(); 1214 *s = self->input_buffer; 1215 self->next_read_idx = n; 1216 return n; 1217 } 1218 1219 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`. 1220 1221 This should be used for all data reads, rather than accessing the unpickler's 1222 input buffer directly. This method deals correctly with reading from input 1223 streams, which the input buffer doesn't deal with. 1224 1225 Note that when reading from a file-like object, self->next_read_idx won't 1226 be updated (it should remain at 0 for the entire unpickling process). You 1227 should use this function's return value to know how many bytes you can 1228 consume. 1229 1230 Returns -1 (with an exception set) on failure. On success, return the 1231 number of chars read. */ 1232 #define _Unpickler_Read(self, s, n) \ 1233 (((n) <= (self)->input_len - (self)->next_read_idx) \ 1234 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \ 1235 (self)->next_read_idx += (n), \ 1236 (n)) \ 1237 : _Unpickler_ReadImpl(self, (s), (n))) 1238 1239 static Py_ssize_t 1240 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len, 1241 char **result) 1242 { 1243 char *input_line = PyMem_Realloc(self->input_line, len + 1); 1244 if (input_line == NULL) { 1245 PyErr_NoMemory(); 1246 return -1; 1247 } 1248 1249 memcpy(input_line, line, len); 1250 input_line[len] = '\0'; 1251 self->input_line = input_line; 1252 *result = self->input_line; 1253 return len; 1254 } 1255 1256 /* Read a line from the input stream/buffer. If we run off the end of the input 1257 before hitting \n, raise an error. 1258 1259 Returns the number of chars read, or -1 on failure. */ 1260 static Py_ssize_t 1261 _Unpickler_Readline(UnpicklerObject *self, char **result) 1262 { 1263 Py_ssize_t i, num_read; 1264 1265 for (i = self->next_read_idx; i < self->input_len; i++) { 1266 if (self->input_buffer[i] == '\n') { 1267 char *line_start = self->input_buffer + self->next_read_idx; 1268 num_read = i - self->next_read_idx + 1; 1269 self->next_read_idx = i + 1; 1270 return _Unpickler_CopyLine(self, line_start, num_read, result); 1271 } 1272 } 1273 if (!self->read) 1274 return bad_readline(); 1275 1276 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE); 1277 if (num_read < 0) 1278 return -1; 1279 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n') 1280 return bad_readline(); 1281 self->next_read_idx = num_read; 1282 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result); 1283 } 1284 1285 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array 1286 will be modified in place. */ 1287 static int 1288 _Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size) 1289 { 1290 Py_ssize_t i; 1291 1292 assert(new_size > self->memo_size); 1293 1294 PyMem_RESIZE(self->memo, PyObject *, new_size); 1295 if (self->memo == NULL) { 1296 PyErr_NoMemory(); 1297 return -1; 1298 } 1299 for (i = self->memo_size; i < new_size; i++) 1300 self->memo[i] = NULL; 1301 self->memo_size = new_size; 1302 return 0; 1303 } 1304 1305 /* Returns NULL if idx is out of bounds. */ 1306 static PyObject * 1307 _Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx) 1308 { 1309 if (idx < 0 || idx >= self->memo_size) 1310 return NULL; 1311 1312 return self->memo[idx]; 1313 } 1314 1315 /* Returns -1 (with an exception set) on failure, 0 on success. 1316 This takes its own reference to `value`. */ 1317 static int 1318 _Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value) 1319 { 1320 PyObject *old_item; 1321 1322 if (idx >= self->memo_size) { 1323 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0) 1324 return -1; 1325 assert(idx < self->memo_size); 1326 } 1327 Py_INCREF(value); 1328 old_item = self->memo[idx]; 1329 self->memo[idx] = value; 1330 if (old_item != NULL) { 1331 Py_DECREF(old_item); 1332 } 1333 else { 1334 self->memo_len++; 1335 } 1336 return 0; 1337 } 1338 1339 static PyObject ** 1340 _Unpickler_NewMemo(Py_ssize_t new_size) 1341 { 1342 PyObject **memo = PyMem_NEW(PyObject *, new_size); 1343 if (memo == NULL) { 1344 PyErr_NoMemory(); 1345 return NULL; 1346 } 1347 memset(memo, 0, new_size * sizeof(PyObject *)); 1348 return memo; 1349 } 1350 1351 /* Free the unpickler's memo, taking care to decref any items left in it. */ 1352 static void 1353 _Unpickler_MemoCleanup(UnpicklerObject *self) 1354 { 1355 Py_ssize_t i; 1356 PyObject **memo = self->memo; 1357 1358 if (self->memo == NULL) 1359 return; 1360 self->memo = NULL; 1361 i = self->memo_size; 1362 while (--i >= 0) { 1363 Py_XDECREF(memo[i]); 1364 } 1365 PyMem_FREE(memo); 1366 } 1367 1368 static UnpicklerObject * 1369 _Unpickler_New(void) 1370 { 1371 UnpicklerObject *self; 1372 1373 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type); 1374 if (self == NULL) 1375 return NULL; 1376 1377 self->pers_func = NULL; 1378 self->input_buffer = NULL; 1379 self->input_line = NULL; 1380 self->input_len = 0; 1381 self->next_read_idx = 0; 1382 self->prefetched_idx = 0; 1383 self->read = NULL; 1384 self->readline = NULL; 1385 self->peek = NULL; 1386 self->encoding = NULL; 1387 self->errors = NULL; 1388 self->marks = NULL; 1389 self->num_marks = 0; 1390 self->marks_size = 0; 1391 self->proto = 0; 1392 self->fix_imports = 0; 1393 memset(&self->buffer, 0, sizeof(Py_buffer)); 1394 self->memo_size = 32; 1395 self->memo_len = 0; 1396 self->memo = _Unpickler_NewMemo(self->memo_size); 1397 self->stack = (Pdata *)Pdata_New(); 1398 1399 if (self->memo == NULL || self->stack == NULL) { 1400 Py_DECREF(self); 1401 return NULL; 1402 } 1403 1404 return self; 1405 } 1406 1407 /* Returns -1 (with an exception set) on failure, 0 on success. This may 1408 be called once on a freshly created Pickler. */ 1409 static int 1410 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file) 1411 { 1412 _Py_IDENTIFIER(peek); 1413 _Py_IDENTIFIER(read); 1414 _Py_IDENTIFIER(readline); 1415 1416 self->peek = _PyObject_GetAttrId(file, &PyId_peek); 1417 if (self->peek == NULL) { 1418 if (PyErr_ExceptionMatches(PyExc_AttributeError)) 1419 PyErr_Clear(); 1420 else 1421 return -1; 1422 } 1423 self->read = _PyObject_GetAttrId(file, &PyId_read); 1424 self->readline = _PyObject_GetAttrId(file, &PyId_readline); 1425 if (self->readline == NULL || self->read == NULL) { 1426 if (PyErr_ExceptionMatches(PyExc_AttributeError)) 1427 PyErr_SetString(PyExc_TypeError, 1428 "file must have 'read' and 'readline' attributes"); 1429 Py_CLEAR(self->read); 1430 Py_CLEAR(self->readline); 1431 Py_CLEAR(self->peek); 1432 return -1; 1433 } 1434 return 0; 1435 } 1436 1437 /* Returns -1 (with an exception set) on failure, 0 on success. This may 1438 be called once on a freshly created Pickler. */ 1439 static int 1440 _Unpickler_SetInputEncoding(UnpicklerObject *self, 1441 const char *encoding, 1442 const char *errors) 1443 { 1444 if (encoding == NULL) 1445 encoding = "ASCII"; 1446 if (errors == NULL) 1447 errors = "strict"; 1448 1449 self->encoding = _PyMem_Strdup(encoding); 1450 self->errors = _PyMem_Strdup(errors); 1451 if (self->encoding == NULL || self->errors == NULL) { 1452 PyErr_NoMemory(); 1453 return -1; 1454 } 1455 return 0; 1456 } 1457 1458 /* Generate a GET opcode for an object stored in the memo. */ 1459 static int 1460 memo_get(PicklerObject *self, PyObject *key) 1461 { 1462 Py_ssize_t *value; 1463 char pdata[30]; 1464 Py_ssize_t len; 1465 1466 value = PyMemoTable_Get(self->memo, key); 1467 if (value == NULL) { 1468 PyErr_SetObject(PyExc_KeyError, key); 1469 return -1; 1470 } 1471 1472 if (!self->bin) { 1473 pdata[0] = GET; 1474 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, 1475 "%" PY_FORMAT_SIZE_T "d\n", *value); 1476 len = strlen(pdata); 1477 } 1478 else { 1479 if (*value < 256) { 1480 pdata[0] = BINGET; 1481 pdata[1] = (unsigned char)(*value & 0xff); 1482 len = 2; 1483 } 1484 else if ((size_t)*value <= 0xffffffffUL) { 1485 pdata[0] = LONG_BINGET; 1486 pdata[1] = (unsigned char)(*value & 0xff); 1487 pdata[2] = (unsigned char)((*value >> 8) & 0xff); 1488 pdata[3] = (unsigned char)((*value >> 16) & 0xff); 1489 pdata[4] = (unsigned char)((*value >> 24) & 0xff); 1490 len = 5; 1491 } 1492 else { /* unlikely */ 1493 PickleState *st = _Pickle_GetGlobalState(); 1494 PyErr_SetString(st->PicklingError, 1495 "memo id too large for LONG_BINGET"); 1496 return -1; 1497 } 1498 } 1499 1500 if (_Pickler_Write(self, pdata, len) < 0) 1501 return -1; 1502 1503 return 0; 1504 } 1505 1506 /* Store an object in the memo, assign it a new unique ID based on the number 1507 of objects currently stored in the memo and generate a PUT opcode. */ 1508 static int 1509 memo_put(PicklerObject *self, PyObject *obj) 1510 { 1511 char pdata[30]; 1512 Py_ssize_t len; 1513 Py_ssize_t idx; 1514 1515 const char memoize_op = MEMOIZE; 1516 1517 if (self->fast) 1518 return 0; 1519 1520 idx = PyMemoTable_Size(self->memo); 1521 if (PyMemoTable_Set(self->memo, obj, idx) < 0) 1522 return -1; 1523 1524 if (self->proto >= 4) { 1525 if (_Pickler_Write(self, &memoize_op, 1) < 0) 1526 return -1; 1527 return 0; 1528 } 1529 else if (!self->bin) { 1530 pdata[0] = PUT; 1531 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, 1532 "%" PY_FORMAT_SIZE_T "d\n", idx); 1533 len = strlen(pdata); 1534 } 1535 else { 1536 if (idx < 256) { 1537 pdata[0] = BINPUT; 1538 pdata[1] = (unsigned char)idx; 1539 len = 2; 1540 } 1541 else if ((size_t)idx <= 0xffffffffUL) { 1542 pdata[0] = LONG_BINPUT; 1543 pdata[1] = (unsigned char)(idx & 0xff); 1544 pdata[2] = (unsigned char)((idx >> 8) & 0xff); 1545 pdata[3] = (unsigned char)((idx >> 16) & 0xff); 1546 pdata[4] = (unsigned char)((idx >> 24) & 0xff); 1547 len = 5; 1548 } 1549 else { /* unlikely */ 1550 PickleState *st = _Pickle_GetGlobalState(); 1551 PyErr_SetString(st->PicklingError, 1552 "memo id too large for LONG_BINPUT"); 1553 return -1; 1554 } 1555 } 1556 if (_Pickler_Write(self, pdata, len) < 0) 1557 return -1; 1558 1559 return 0; 1560 } 1561 1562 static PyObject * 1563 get_dotted_path(PyObject *obj, PyObject *name) 1564 { 1565 _Py_static_string(PyId_dot, "."); 1566 PyObject *dotted_path; 1567 Py_ssize_t i, n; 1568 1569 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1); 1570 if (dotted_path == NULL) 1571 return NULL; 1572 n = PyList_GET_SIZE(dotted_path); 1573 assert(n >= 1); 1574 for (i = 0; i < n; i++) { 1575 PyObject *subpath = PyList_GET_ITEM(dotted_path, i); 1576 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) { 1577 if (obj == NULL) 1578 PyErr_Format(PyExc_AttributeError, 1579 "Can't pickle local object %R", name); 1580 else 1581 PyErr_Format(PyExc_AttributeError, 1582 "Can't pickle local attribute %R on %R", name, obj); 1583 Py_DECREF(dotted_path); 1584 return NULL; 1585 } 1586 } 1587 return dotted_path; 1588 } 1589 1590 static PyObject * 1591 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent) 1592 { 1593 Py_ssize_t i, n; 1594 PyObject *parent = NULL; 1595 1596 assert(PyList_CheckExact(names)); 1597 Py_INCREF(obj); 1598 n = PyList_GET_SIZE(names); 1599 for (i = 0; i < n; i++) { 1600 PyObject *name = PyList_GET_ITEM(names, i); 1601 Py_XDECREF(parent); 1602 parent = obj; 1603 obj = PyObject_GetAttr(parent, name); 1604 if (obj == NULL) { 1605 Py_DECREF(parent); 1606 return NULL; 1607 } 1608 } 1609 if (pparent != NULL) 1610 *pparent = parent; 1611 else 1612 Py_XDECREF(parent); 1613 return obj; 1614 } 1615 1616 static void 1617 reformat_attribute_error(PyObject *obj, PyObject *name) 1618 { 1619 if (PyErr_ExceptionMatches(PyExc_AttributeError)) { 1620 PyErr_Clear(); 1621 PyErr_Format(PyExc_AttributeError, 1622 "Can't get attribute %R on %R", name, obj); 1623 } 1624 } 1625 1626 1627 static PyObject * 1628 getattribute(PyObject *obj, PyObject *name, int allow_qualname) 1629 { 1630 PyObject *dotted_path, *attr; 1631 1632 if (allow_qualname) { 1633 dotted_path = get_dotted_path(obj, name); 1634 if (dotted_path == NULL) 1635 return NULL; 1636 attr = get_deep_attribute(obj, dotted_path, NULL); 1637 Py_DECREF(dotted_path); 1638 } 1639 else 1640 attr = PyObject_GetAttr(obj, name); 1641 if (attr == NULL) 1642 reformat_attribute_error(obj, name); 1643 return attr; 1644 } 1645 1646 static PyObject * 1647 whichmodule(PyObject *global, PyObject *dotted_path) 1648 { 1649 PyObject *module_name; 1650 PyObject *modules_dict; 1651 PyObject *module; 1652 Py_ssize_t i; 1653 _Py_IDENTIFIER(__module__); 1654 _Py_IDENTIFIER(modules); 1655 _Py_IDENTIFIER(__main__); 1656 1657 module_name = _PyObject_GetAttrId(global, &PyId___module__); 1658 1659 if (module_name == NULL) { 1660 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) 1661 return NULL; 1662 PyErr_Clear(); 1663 } 1664 else { 1665 /* In some rare cases (e.g., bound methods of extension types), 1666 __module__ can be None. If it is so, then search sys.modules for 1667 the module of global. */ 1668 if (module_name != Py_None) 1669 return module_name; 1670 Py_CLEAR(module_name); 1671 } 1672 assert(module_name == NULL); 1673 1674 /* Fallback on walking sys.modules */ 1675 modules_dict = _PySys_GetObjectId(&PyId_modules); 1676 if (modules_dict == NULL) { 1677 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules"); 1678 return NULL; 1679 } 1680 1681 i = 0; 1682 while (PyDict_Next(modules_dict, &i, &module_name, &module)) { 1683 PyObject *candidate; 1684 if (PyUnicode_Check(module_name) && 1685 _PyUnicode_EqualToASCIIString(module_name, "__main__")) 1686 continue; 1687 if (module == Py_None) 1688 continue; 1689 1690 candidate = get_deep_attribute(module, dotted_path, NULL); 1691 if (candidate == NULL) { 1692 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) 1693 return NULL; 1694 PyErr_Clear(); 1695 continue; 1696 } 1697 1698 if (candidate == global) { 1699 Py_INCREF(module_name); 1700 Py_DECREF(candidate); 1701 return module_name; 1702 } 1703 Py_DECREF(candidate); 1704 } 1705 1706 /* If no module is found, use __main__. */ 1707 module_name = _PyUnicode_FromId(&PyId___main__); 1708 Py_INCREF(module_name); 1709 return module_name; 1710 } 1711 1712 /* fast_save_enter() and fast_save_leave() are guards against recursive 1713 objects when Pickler is used with the "fast mode" (i.e., with object 1714 memoization disabled). If the nesting of a list or dict object exceed 1715 FAST_NESTING_LIMIT, these guards will start keeping an internal 1716 reference to the seen list or dict objects and check whether these objects 1717 are recursive. These are not strictly necessary, since save() has a 1718 hard-coded recursion limit, but they give a nicer error message than the 1719 typical RuntimeError. */ 1720 static int 1721 fast_save_enter(PicklerObject *self, PyObject *obj) 1722 { 1723 /* if fast_nesting < 0, we're doing an error exit. */ 1724 if (++self->fast_nesting >= FAST_NESTING_LIMIT) { 1725 PyObject *key = NULL; 1726 if (self->fast_memo == NULL) { 1727 self->fast_memo = PyDict_New(); 1728 if (self->fast_memo == NULL) { 1729 self->fast_nesting = -1; 1730 return 0; 1731 } 1732 } 1733 key = PyLong_FromVoidPtr(obj); 1734 if (key == NULL) 1735 return 0; 1736 if (PyDict_GetItemWithError(self->fast_memo, key)) { 1737 Py_DECREF(key); 1738 PyErr_Format(PyExc_ValueError, 1739 "fast mode: can't pickle cyclic objects " 1740 "including object type %.200s at %p", 1741 obj->ob_type->tp_name, obj); 1742 self->fast_nesting = -1; 1743 return 0; 1744 } 1745 if (PyErr_Occurred()) { 1746 return 0; 1747 } 1748 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) { 1749 Py_DECREF(key); 1750 self->fast_nesting = -1; 1751 return 0; 1752 } 1753 Py_DECREF(key); 1754 } 1755 return 1; 1756 } 1757 1758 static int 1759 fast_save_leave(PicklerObject *self, PyObject *obj) 1760 { 1761 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) { 1762 PyObject *key = PyLong_FromVoidPtr(obj); 1763 if (key == NULL) 1764 return 0; 1765 if (PyDict_DelItem(self->fast_memo, key) < 0) { 1766 Py_DECREF(key); 1767 return 0; 1768 } 1769 Py_DECREF(key); 1770 } 1771 return 1; 1772 } 1773 1774 static int 1775 save_none(PicklerObject *self, PyObject *obj) 1776 { 1777 const char none_op = NONE; 1778 if (_Pickler_Write(self, &none_op, 1) < 0) 1779 return -1; 1780 1781 return 0; 1782 } 1783 1784 static int 1785 save_bool(PicklerObject *self, PyObject *obj) 1786 { 1787 if (self->proto >= 2) { 1788 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE; 1789 if (_Pickler_Write(self, &bool_op, 1) < 0) 1790 return -1; 1791 } 1792 else { 1793 /* These aren't opcodes -- they're ways to pickle bools before protocol 2 1794 * so that unpicklers written before bools were introduced unpickle them 1795 * as ints, but unpicklers after can recognize that bools were intended. 1796 * Note that protocol 2 added direct ways to pickle bools. 1797 */ 1798 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n"; 1799 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0) 1800 return -1; 1801 } 1802 return 0; 1803 } 1804 1805 static int 1806 save_long(PicklerObject *self, PyObject *obj) 1807 { 1808 PyObject *repr = NULL; 1809 Py_ssize_t size; 1810 long val; 1811 int status = 0; 1812 1813 const char long_op = LONG; 1814 1815 val= PyLong_AsLong(obj); 1816 if (val == -1 && PyErr_Occurred()) { 1817 /* out of range for int pickling */ 1818 PyErr_Clear(); 1819 } 1820 else if (self->bin && 1821 (sizeof(long) <= 4 || 1822 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1)))) { 1823 /* result fits in a signed 4-byte integer. 1824 1825 Note: we can't use -0x80000000L in the above condition because some 1826 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type 1827 before applying the unary minus when sizeof(long) <= 4. The 1828 resulting value stays unsigned which is commonly not what we want, 1829 so MSVC happily warns us about it. However, that result would have 1830 been fine because we guard for sizeof(long) <= 4 which turns the 1831 condition true in that particular case. */ 1832 char pdata[32]; 1833 Py_ssize_t len = 0; 1834 1835 pdata[1] = (unsigned char)(val & 0xff); 1836 pdata[2] = (unsigned char)((val >> 8) & 0xff); 1837 pdata[3] = (unsigned char)((val >> 16) & 0xff); 1838 pdata[4] = (unsigned char)((val >> 24) & 0xff); 1839 1840 if ((pdata[4] == 0) && (pdata[3] == 0)) { 1841 if (pdata[2] == 0) { 1842 pdata[0] = BININT1; 1843 len = 2; 1844 } 1845 else { 1846 pdata[0] = BININT2; 1847 len = 3; 1848 } 1849 } 1850 else { 1851 pdata[0] = BININT; 1852 len = 5; 1853 } 1854 1855 if (_Pickler_Write(self, pdata, len) < 0) 1856 return -1; 1857 1858 return 0; 1859 } 1860 1861 if (self->proto >= 2) { 1862 /* Linear-time pickling. */ 1863 size_t nbits; 1864 size_t nbytes; 1865 unsigned char *pdata; 1866 char header[5]; 1867 int i; 1868 int sign = _PyLong_Sign(obj); 1869 1870 if (sign == 0) { 1871 header[0] = LONG1; 1872 header[1] = 0; /* It's 0 -- an empty bytestring. */ 1873 if (_Pickler_Write(self, header, 2) < 0) 1874 goto error; 1875 return 0; 1876 } 1877 nbits = _PyLong_NumBits(obj); 1878 if (nbits == (size_t)-1 && PyErr_Occurred()) 1879 goto error; 1880 /* How many bytes do we need? There are nbits >> 3 full 1881 * bytes of data, and nbits & 7 leftover bits. If there 1882 * are any leftover bits, then we clearly need another 1883 * byte. Wnat's not so obvious is that we *probably* 1884 * need another byte even if there aren't any leftovers: 1885 * the most-significant bit of the most-significant byte 1886 * acts like a sign bit, and it's usually got a sense 1887 * opposite of the one we need. The exception is ints 1888 * of the form -(2**(8*j-1)) for j > 0. Such an int is 1889 * its own 256's-complement, so has the right sign bit 1890 * even without the extra byte. That's a pain to check 1891 * for in advance, though, so we always grab an extra 1892 * byte at the start, and cut it back later if possible. 1893 */ 1894 nbytes = (nbits >> 3) + 1; 1895 if (nbytes > 0x7fffffffL) { 1896 PyErr_SetString(PyExc_OverflowError, 1897 "int too large to pickle"); 1898 goto error; 1899 } 1900 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes); 1901 if (repr == NULL) 1902 goto error; 1903 pdata = (unsigned char *)PyBytes_AS_STRING(repr); 1904 i = _PyLong_AsByteArray((PyLongObject *)obj, 1905 pdata, nbytes, 1906 1 /* little endian */ , 1 /* signed */ ); 1907 if (i < 0) 1908 goto error; 1909 /* If the int is negative, this may be a byte more than 1910 * needed. This is so iff the MSB is all redundant sign 1911 * bits. 1912 */ 1913 if (sign < 0 && 1914 nbytes > 1 && 1915 pdata[nbytes - 1] == 0xff && 1916 (pdata[nbytes - 2] & 0x80) != 0) { 1917 nbytes--; 1918 } 1919 1920 if (nbytes < 256) { 1921 header[0] = LONG1; 1922 header[1] = (unsigned char)nbytes; 1923 size = 2; 1924 } 1925 else { 1926 header[0] = LONG4; 1927 size = (Py_ssize_t) nbytes; 1928 for (i = 1; i < 5; i++) { 1929 header[i] = (unsigned char)(size & 0xff); 1930 size >>= 8; 1931 } 1932 size = 5; 1933 } 1934 if (_Pickler_Write(self, header, size) < 0 || 1935 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0) 1936 goto error; 1937 } 1938 else { 1939 char *string; 1940 1941 /* proto < 2: write the repr and newline. This is quadratic-time (in 1942 the number of digits), in both directions. We add a trailing 'L' 1943 to the repr, for compatibility with Python 2.x. */ 1944 1945 repr = PyObject_Repr(obj); 1946 if (repr == NULL) 1947 goto error; 1948 1949 string = PyUnicode_AsUTF8AndSize(repr, &size); 1950 if (string == NULL) 1951 goto error; 1952 1953 if (_Pickler_Write(self, &long_op, 1) < 0 || 1954 _Pickler_Write(self, string, size) < 0 || 1955 _Pickler_Write(self, "L\n", 2) < 0) 1956 goto error; 1957 } 1958 1959 if (0) { 1960 error: 1961 status = -1; 1962 } 1963 Py_XDECREF(repr); 1964 1965 return status; 1966 } 1967 1968 static int 1969 save_float(PicklerObject *self, PyObject *obj) 1970 { 1971 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj); 1972 1973 if (self->bin) { 1974 char pdata[9]; 1975 pdata[0] = BINFLOAT; 1976 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0) 1977 return -1; 1978 if (_Pickler_Write(self, pdata, 9) < 0) 1979 return -1; 1980 } 1981 else { 1982 int result = -1; 1983 char *buf = NULL; 1984 char op = FLOAT; 1985 1986 if (_Pickler_Write(self, &op, 1) < 0) 1987 goto done; 1988 1989 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL); 1990 if (!buf) { 1991 PyErr_NoMemory(); 1992 goto done; 1993 } 1994 1995 if (_Pickler_Write(self, buf, strlen(buf)) < 0) 1996 goto done; 1997 1998 if (_Pickler_Write(self, "\n", 1) < 0) 1999 goto done; 2000 2001 result = 0; 2002 done: 2003 PyMem_Free(buf); 2004 return result; 2005 } 2006 2007 return 0; 2008 } 2009 2010 static int 2011 save_bytes(PicklerObject *self, PyObject *obj) 2012 { 2013 if (self->proto < 3) { 2014 /* Older pickle protocols do not have an opcode for pickling bytes 2015 objects. Therefore, we need to fake the copy protocol (i.e., 2016 the __reduce__ method) to permit bytes object unpickling. 2017 2018 Here we use a hack to be compatible with Python 2. Since in Python 2019 2 'bytes' is just an alias for 'str' (which has different 2020 parameters than the actual bytes object), we use codecs.encode 2021 to create the appropriate 'str' object when unpickled using 2022 Python 2 *and* the appropriate 'bytes' object when unpickled 2023 using Python 3. Again this is a hack and we don't need to do this 2024 with newer protocols. */ 2025 PyObject *reduce_value = NULL; 2026 int status; 2027 2028 if (PyBytes_GET_SIZE(obj) == 0) { 2029 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type); 2030 } 2031 else { 2032 PickleState *st = _Pickle_GetGlobalState(); 2033 PyObject *unicode_str = 2034 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj), 2035 PyBytes_GET_SIZE(obj), 2036 "strict"); 2037 _Py_IDENTIFIER(latin1); 2038 2039 if (unicode_str == NULL) 2040 return -1; 2041 reduce_value = Py_BuildValue("(O(OO))", 2042 st->codecs_encode, unicode_str, 2043 _PyUnicode_FromId(&PyId_latin1)); 2044 Py_DECREF(unicode_str); 2045 } 2046 2047 if (reduce_value == NULL) 2048 return -1; 2049 2050 /* save_reduce() will memoize the object automatically. */ 2051 status = save_reduce(self, reduce_value, obj); 2052 Py_DECREF(reduce_value); 2053 return status; 2054 } 2055 else { 2056 Py_ssize_t size; 2057 char header[9]; 2058 Py_ssize_t len; 2059 2060 size = PyBytes_GET_SIZE(obj); 2061 if (size < 0) 2062 return -1; 2063 2064 if (size <= 0xff) { 2065 header[0] = SHORT_BINBYTES; 2066 header[1] = (unsigned char)size; 2067 len = 2; 2068 } 2069 else if ((size_t)size <= 0xffffffffUL) { 2070 header[0] = BINBYTES; 2071 header[1] = (unsigned char)(size & 0xff); 2072 header[2] = (unsigned char)((size >> 8) & 0xff); 2073 header[3] = (unsigned char)((size >> 16) & 0xff); 2074 header[4] = (unsigned char)((size >> 24) & 0xff); 2075 len = 5; 2076 } 2077 else if (self->proto >= 4) { 2078 header[0] = BINBYTES8; 2079 _write_size64(header + 1, size); 2080 len = 9; 2081 } 2082 else { 2083 PyErr_SetString(PyExc_OverflowError, 2084 "cannot serialize a bytes object larger than 4 GiB"); 2085 return -1; /* string too large */ 2086 } 2087 2088 if (_Pickler_Write(self, header, len) < 0) 2089 return -1; 2090 2091 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0) 2092 return -1; 2093 2094 if (memo_put(self, obj) < 0) 2095 return -1; 2096 2097 return 0; 2098 } 2099 } 2100 2101 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates 2102 backslash and newline characters to \uXXXX escapes. */ 2103 static PyObject * 2104 raw_unicode_escape(PyObject *obj) 2105 { 2106 char *p; 2107 Py_ssize_t i, size; 2108 void *data; 2109 unsigned int kind; 2110 _PyBytesWriter writer; 2111 2112 if (PyUnicode_READY(obj)) 2113 return NULL; 2114 2115 _PyBytesWriter_Init(&writer); 2116 2117 size = PyUnicode_GET_LENGTH(obj); 2118 data = PyUnicode_DATA(obj); 2119 kind = PyUnicode_KIND(obj); 2120 2121 p = _PyBytesWriter_Alloc(&writer, size); 2122 if (p == NULL) 2123 goto error; 2124 writer.overallocate = 1; 2125 2126 for (i=0; i < size; i++) { 2127 Py_UCS4 ch = PyUnicode_READ(kind, data, i); 2128 /* Map 32-bit characters to '\Uxxxxxxxx' */ 2129 if (ch >= 0x10000) { 2130 /* -1: subtract 1 preallocated byte */ 2131 p = _PyBytesWriter_Prepare(&writer, p, 10-1); 2132 if (p == NULL) 2133 goto error; 2134 2135 *p++ = '\\'; 2136 *p++ = 'U'; 2137 *p++ = Py_hexdigits[(ch >> 28) & 0xf]; 2138 *p++ = Py_hexdigits[(ch >> 24) & 0xf]; 2139 *p++ = Py_hexdigits[(ch >> 20) & 0xf]; 2140 *p++ = Py_hexdigits[(ch >> 16) & 0xf]; 2141 *p++ = Py_hexdigits[(ch >> 12) & 0xf]; 2142 *p++ = Py_hexdigits[(ch >> 8) & 0xf]; 2143 *p++ = Py_hexdigits[(ch >> 4) & 0xf]; 2144 *p++ = Py_hexdigits[ch & 15]; 2145 } 2146 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */ 2147 else if (ch >= 256 || ch == '\\' || ch == '\n') { 2148 /* -1: subtract 1 preallocated byte */ 2149 p = _PyBytesWriter_Prepare(&writer, p, 6-1); 2150 if (p == NULL) 2151 goto error; 2152 2153 *p++ = '\\'; 2154 *p++ = 'u'; 2155 *p++ = Py_hexdigits[(ch >> 12) & 0xf]; 2156 *p++ = Py_hexdigits[(ch >> 8) & 0xf]; 2157 *p++ = Py_hexdigits[(ch >> 4) & 0xf]; 2158 *p++ = Py_hexdigits[ch & 15]; 2159 } 2160 /* Copy everything else as-is */ 2161 else 2162 *p++ = (char) ch; 2163 } 2164 2165 return _PyBytesWriter_Finish(&writer, p); 2166 2167 error: 2168 _PyBytesWriter_Dealloc(&writer); 2169 return NULL; 2170 } 2171 2172 static int 2173 write_utf8(PicklerObject *self, const char *data, Py_ssize_t size) 2174 { 2175 char header[9]; 2176 Py_ssize_t len; 2177 2178 assert(size >= 0); 2179 if (size <= 0xff && self->proto >= 4) { 2180 header[0] = SHORT_BINUNICODE; 2181 header[1] = (unsigned char)(size & 0xff); 2182 len = 2; 2183 } 2184 else if ((size_t)size <= 0xffffffffUL) { 2185 header[0] = BINUNICODE; 2186 header[1] = (unsigned char)(size & 0xff); 2187 header[2] = (unsigned char)((size >> 8) & 0xff); 2188 header[3] = (unsigned char)((size >> 16) & 0xff); 2189 header[4] = (unsigned char)((size >> 24) & 0xff); 2190 len = 5; 2191 } 2192 else if (self->proto >= 4) { 2193 header[0] = BINUNICODE8; 2194 _write_size64(header + 1, size); 2195 len = 9; 2196 } 2197 else { 2198 PyErr_SetString(PyExc_OverflowError, 2199 "cannot serialize a string larger than 4GiB"); 2200 return -1; 2201 } 2202 2203 if (_Pickler_Write(self, header, len) < 0) 2204 return -1; 2205 if (_Pickler_Write(self, data, size) < 0) 2206 return -1; 2207 2208 return 0; 2209 } 2210 2211 static int 2212 write_unicode_binary(PicklerObject *self, PyObject *obj) 2213 { 2214 PyObject *encoded = NULL; 2215 Py_ssize_t size; 2216 char *data; 2217 int r; 2218 2219 if (PyUnicode_READY(obj)) 2220 return -1; 2221 2222 data = PyUnicode_AsUTF8AndSize(obj, &size); 2223 if (data != NULL) 2224 return write_utf8(self, data, size); 2225 2226 /* Issue #8383: for strings with lone surrogates, fallback on the 2227 "surrogatepass" error handler. */ 2228 PyErr_Clear(); 2229 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass"); 2230 if (encoded == NULL) 2231 return -1; 2232 2233 r = write_utf8(self, PyBytes_AS_STRING(encoded), 2234 PyBytes_GET_SIZE(encoded)); 2235 Py_DECREF(encoded); 2236 return r; 2237 } 2238 2239 static int 2240 save_unicode(PicklerObject *self, PyObject *obj) 2241 { 2242 if (self->bin) { 2243 if (write_unicode_binary(self, obj) < 0) 2244 return -1; 2245 } 2246 else { 2247 PyObject *encoded; 2248 Py_ssize_t size; 2249 const char unicode_op = UNICODE; 2250 2251 encoded = raw_unicode_escape(obj); 2252 if (encoded == NULL) 2253 return -1; 2254 2255 if (_Pickler_Write(self, &unicode_op, 1) < 0) { 2256 Py_DECREF(encoded); 2257 return -1; 2258 } 2259 2260 size = PyBytes_GET_SIZE(encoded); 2261 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) { 2262 Py_DECREF(encoded); 2263 return -1; 2264 } 2265 Py_DECREF(encoded); 2266 2267 if (_Pickler_Write(self, "\n", 1) < 0) 2268 return -1; 2269 } 2270 if (memo_put(self, obj) < 0) 2271 return -1; 2272 2273 return 0; 2274 } 2275 2276 /* A helper for save_tuple. Push the len elements in tuple t on the stack. */ 2277 static int 2278 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len) 2279 { 2280 Py_ssize_t i; 2281 2282 assert(PyTuple_Size(t) == len); 2283 2284 for (i = 0; i < len; i++) { 2285 PyObject *element = PyTuple_GET_ITEM(t, i); 2286 2287 if (element == NULL) 2288 return -1; 2289 if (save(self, element, 0) < 0) 2290 return -1; 2291 } 2292 2293 return 0; 2294 } 2295 2296 /* Tuples are ubiquitous in the pickle protocols, so many techniques are 2297 * used across protocols to minimize the space needed to pickle them. 2298 * Tuples are also the only builtin immutable type that can be recursive 2299 * (a tuple can be reached from itself), and that requires some subtle 2300 * magic so that it works in all cases. IOW, this is a long routine. 2301 */ 2302 static int 2303 save_tuple(PicklerObject *self, PyObject *obj) 2304 { 2305 Py_ssize_t len, i; 2306 2307 const char mark_op = MARK; 2308 const char tuple_op = TUPLE; 2309 const char pop_op = POP; 2310 const char pop_mark_op = POP_MARK; 2311 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3}; 2312 2313 if ((len = PyTuple_Size(obj)) < 0) 2314 return -1; 2315 2316 if (len == 0) { 2317 char pdata[2]; 2318 2319 if (self->proto) { 2320 pdata[0] = EMPTY_TUPLE; 2321 len = 1; 2322 } 2323 else { 2324 pdata[0] = MARK; 2325 pdata[1] = TUPLE; 2326 len = 2; 2327 } 2328 if (_Pickler_Write(self, pdata, len) < 0) 2329 return -1; 2330 return 0; 2331 } 2332 2333 /* The tuple isn't in the memo now. If it shows up there after 2334 * saving the tuple elements, the tuple must be recursive, in 2335 * which case we'll pop everything we put on the stack, and fetch 2336 * its value from the memo. 2337 */ 2338 if (len <= 3 && self->proto >= 2) { 2339 /* Use TUPLE{1,2,3} opcodes. */ 2340 if (store_tuple_elements(self, obj, len) < 0) 2341 return -1; 2342 2343 if (PyMemoTable_Get(self->memo, obj)) { 2344 /* pop the len elements */ 2345 for (i = 0; i < len; i++) 2346 if (_Pickler_Write(self, &pop_op, 1) < 0) 2347 return -1; 2348 /* fetch from memo */ 2349 if (memo_get(self, obj) < 0) 2350 return -1; 2351 2352 return 0; 2353 } 2354 else { /* Not recursive. */ 2355 if (_Pickler_Write(self, len2opcode + len, 1) < 0) 2356 return -1; 2357 } 2358 goto memoize; 2359 } 2360 2361 /* proto < 2 and len > 0, or proto >= 2 and len > 3. 2362 * Generate MARK e1 e2 ... TUPLE 2363 */ 2364 if (_Pickler_Write(self, &mark_op, 1) < 0) 2365 return -1; 2366 2367 if (store_tuple_elements(self, obj, len) < 0) 2368 return -1; 2369 2370 if (PyMemoTable_Get(self->memo, obj)) { 2371 /* pop the stack stuff we pushed */ 2372 if (self->bin) { 2373 if (_Pickler_Write(self, &pop_mark_op, 1) < 0) 2374 return -1; 2375 } 2376 else { 2377 /* Note that we pop one more than len, to remove 2378 * the MARK too. 2379 */ 2380 for (i = 0; i <= len; i++) 2381 if (_Pickler_Write(self, &pop_op, 1) < 0) 2382 return -1; 2383 } 2384 /* fetch from memo */ 2385 if (memo_get(self, obj) < 0) 2386 return -1; 2387 2388 return 0; 2389 } 2390 else { /* Not recursive. */ 2391 if (_Pickler_Write(self, &tuple_op, 1) < 0) 2392 return -1; 2393 } 2394 2395 memoize: 2396 if (memo_put(self, obj) < 0) 2397 return -1; 2398 2399 return 0; 2400 } 2401 2402 /* iter is an iterator giving items, and we batch up chunks of 2403 * MARK item item ... item APPENDS 2404 * opcode sequences. Calling code should have arranged to first create an 2405 * empty list, or list-like object, for the APPENDS to operate on. 2406 * Returns 0 on success, <0 on error. 2407 */ 2408 static int 2409 batch_list(PicklerObject *self, PyObject *iter) 2410 { 2411 PyObject *obj = NULL; 2412 PyObject *firstitem = NULL; 2413 int i, n; 2414 2415 const char mark_op = MARK; 2416 const char append_op = APPEND; 2417 const char appends_op = APPENDS; 2418 2419 assert(iter != NULL); 2420 2421 /* XXX: I think this function could be made faster by avoiding the 2422 iterator interface and fetching objects directly from list using 2423 PyList_GET_ITEM. 2424 */ 2425 2426 if (self->proto == 0) { 2427 /* APPENDS isn't available; do one at a time. */ 2428 for (;;) { 2429 obj = PyIter_Next(iter); 2430 if (obj == NULL) { 2431 if (PyErr_Occurred()) 2432 return -1; 2433 break; 2434 } 2435 i = save(self, obj, 0); 2436 Py_DECREF(obj); 2437 if (i < 0) 2438 return -1; 2439 if (_Pickler_Write(self, &append_op, 1) < 0) 2440 return -1; 2441 } 2442 return 0; 2443 } 2444 2445 /* proto > 0: write in batches of BATCHSIZE. */ 2446 do { 2447 /* Get first item */ 2448 firstitem = PyIter_Next(iter); 2449 if (firstitem == NULL) { 2450 if (PyErr_Occurred()) 2451 goto error; 2452 2453 /* nothing more to add */ 2454 break; 2455 } 2456 2457 /* Try to get a second item */ 2458 obj = PyIter_Next(iter); 2459 if (obj == NULL) { 2460 if (PyErr_Occurred()) 2461 goto error; 2462 2463 /* Only one item to write */ 2464 if (save(self, firstitem, 0) < 0) 2465 goto error; 2466 if (_Pickler_Write(self, &append_op, 1) < 0) 2467 goto error; 2468 Py_CLEAR(firstitem); 2469 break; 2470 } 2471 2472 /* More than one item to write */ 2473 2474 /* Pump out MARK, items, APPENDS. */ 2475 if (_Pickler_Write(self, &mark_op, 1) < 0) 2476 goto error; 2477 2478 if (save(self, firstitem, 0) < 0) 2479 goto error; 2480 Py_CLEAR(firstitem); 2481 n = 1; 2482 2483 /* Fetch and save up to BATCHSIZE items */ 2484 while (obj) { 2485 if (save(self, obj, 0) < 0) 2486 goto error; 2487 Py_CLEAR(obj); 2488 n += 1; 2489 2490 if (n == BATCHSIZE) 2491 break; 2492 2493 obj = PyIter_Next(iter); 2494 if (obj == NULL) { 2495 if (PyErr_Occurred()) 2496 goto error; 2497 break; 2498 } 2499 } 2500 2501 if (_Pickler_Write(self, &appends_op, 1) < 0) 2502 goto error; 2503 2504 } while (n == BATCHSIZE); 2505 return 0; 2506 2507 error: 2508 Py_XDECREF(firstitem); 2509 Py_XDECREF(obj); 2510 return -1; 2511 } 2512 2513 /* This is a variant of batch_list() above, specialized for lists (with no 2514 * support for list subclasses). Like batch_list(), we batch up chunks of 2515 * MARK item item ... item APPENDS 2516 * opcode sequences. Calling code should have arranged to first create an 2517 * empty list, or list-like object, for the APPENDS to operate on. 2518 * Returns 0 on success, -1 on error. 2519 * 2520 * This version is considerably faster than batch_list(), if less general. 2521 * 2522 * Note that this only works for protocols > 0. 2523 */ 2524 static int 2525 batch_list_exact(PicklerObject *self, PyObject *obj) 2526 { 2527 PyObject *item = NULL; 2528 Py_ssize_t this_batch, total; 2529 2530 const char append_op = APPEND; 2531 const char appends_op = APPENDS; 2532 const char mark_op = MARK; 2533 2534 assert(obj != NULL); 2535 assert(self->proto > 0); 2536 assert(PyList_CheckExact(obj)); 2537 2538 if (PyList_GET_SIZE(obj) == 1) { 2539 item = PyList_GET_ITEM(obj, 0); 2540 if (save(self, item, 0) < 0) 2541 return -1; 2542 if (_Pickler_Write(self, &append_op, 1) < 0) 2543 return -1; 2544 return 0; 2545 } 2546 2547 /* Write in batches of BATCHSIZE. */ 2548 total = 0; 2549 do { 2550 this_batch = 0; 2551 if (_Pickler_Write(self, &mark_op, 1) < 0) 2552 return -1; 2553 while (total < PyList_GET_SIZE(obj)) { 2554 item = PyList_GET_ITEM(obj, total); 2555 if (save(self, item, 0) < 0) 2556 return -1; 2557 total++; 2558 if (++this_batch == BATCHSIZE) 2559 break; 2560 } 2561 if (_Pickler_Write(self, &appends_op, 1) < 0) 2562 return -1; 2563 2564 } while (total < PyList_GET_SIZE(obj)); 2565 2566 return 0; 2567 } 2568 2569 static int 2570 save_list(PicklerObject *self, PyObject *obj) 2571 { 2572 char header[3]; 2573 Py_ssize_t len; 2574 int status = 0; 2575 2576 if (self->fast && !fast_save_enter(self, obj)) 2577 goto error; 2578 2579 /* Create an empty list. */ 2580 if (self->bin) { 2581 header[0] = EMPTY_LIST; 2582 len = 1; 2583 } 2584 else { 2585 header[0] = MARK; 2586 header[1] = LIST; 2587 len = 2; 2588 } 2589 2590 if (_Pickler_Write(self, header, len) < 0) 2591 goto error; 2592 2593 /* Get list length, and bow out early if empty. */ 2594 if ((len = PyList_Size(obj)) < 0) 2595 goto error; 2596 2597 if (memo_put(self, obj) < 0) 2598 goto error; 2599 2600 if (len != 0) { 2601 /* Materialize the list elements. */ 2602 if (PyList_CheckExact(obj) && self->proto > 0) { 2603 if (Py_EnterRecursiveCall(" while pickling an object")) 2604 goto error; 2605 status = batch_list_exact(self, obj); 2606 Py_LeaveRecursiveCall(); 2607 } else { 2608 PyObject *iter = PyObject_GetIter(obj); 2609 if (iter == NULL) 2610 goto error; 2611 2612 if (Py_EnterRecursiveCall(" while pickling an object")) { 2613 Py_DECREF(iter); 2614 goto error; 2615 } 2616 status = batch_list(self, iter); 2617 Py_LeaveRecursiveCall(); 2618 Py_DECREF(iter); 2619 } 2620 } 2621 if (0) { 2622 error: 2623 status = -1; 2624 } 2625 2626 if (self->fast && !fast_save_leave(self, obj)) 2627 status = -1; 2628 2629 return status; 2630 } 2631 2632 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of 2633 * MARK key value ... key value SETITEMS 2634 * opcode sequences. Calling code should have arranged to first create an 2635 * empty dict, or dict-like object, for the SETITEMS to operate on. 2636 * Returns 0 on success, <0 on error. 2637 * 2638 * This is very much like batch_list(). The difference between saving 2639 * elements directly, and picking apart two-tuples, is so long-winded at 2640 * the C level, though, that attempts to combine these routines were too 2641 * ugly to bear. 2642 */ 2643 static int 2644 batch_dict(PicklerObject *self, PyObject *iter) 2645 { 2646 PyObject *obj = NULL; 2647 PyObject *firstitem = NULL; 2648 int i, n; 2649 2650 const char mark_op = MARK; 2651 const char setitem_op = SETITEM; 2652 const char setitems_op = SETITEMS; 2653 2654 assert(iter != NULL); 2655 2656 if (self->proto == 0) { 2657 /* SETITEMS isn't available; do one at a time. */ 2658 for (;;) { 2659 obj = PyIter_Next(iter); 2660 if (obj == NULL) { 2661 if (PyErr_Occurred()) 2662 return -1; 2663 break; 2664 } 2665 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) { 2666 PyErr_SetString(PyExc_TypeError, "dict items " 2667 "iterator must return 2-tuples"); 2668 return -1; 2669 } 2670 i = save(self, PyTuple_GET_ITEM(obj, 0), 0); 2671 if (i >= 0) 2672 i = save(self, PyTuple_GET_ITEM(obj, 1), 0); 2673 Py_DECREF(obj); 2674 if (i < 0) 2675 return -1; 2676 if (_Pickler_Write(self, &setitem_op, 1) < 0) 2677 return -1; 2678 } 2679 return 0; 2680 } 2681 2682 /* proto > 0: write in batches of BATCHSIZE. */ 2683 do { 2684 /* Get first item */ 2685 firstitem = PyIter_Next(iter); 2686 if (firstitem == NULL) { 2687 if (PyErr_Occurred()) 2688 goto error; 2689 2690 /* nothing more to add */ 2691 break; 2692 } 2693 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) { 2694 PyErr_SetString(PyExc_TypeError, "dict items " 2695 "iterator must return 2-tuples"); 2696 goto error; 2697 } 2698 2699 /* Try to get a second item */ 2700 obj = PyIter_Next(iter); 2701 if (obj == NULL) { 2702 if (PyErr_Occurred()) 2703 goto error; 2704 2705 /* Only one item to write */ 2706 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0) 2707 goto error; 2708 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0) 2709 goto error; 2710 if (_Pickler_Write(self, &setitem_op, 1) < 0) 2711 goto error; 2712 Py_CLEAR(firstitem); 2713 break; 2714 } 2715 2716 /* More than one item to write */ 2717 2718 /* Pump out MARK, items, SETITEMS. */ 2719 if (_Pickler_Write(self, &mark_op, 1) < 0) 2720 goto error; 2721 2722 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0) 2723 goto error; 2724 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0) 2725 goto error; 2726 Py_CLEAR(firstitem); 2727 n = 1; 2728 2729 /* Fetch and save up to BATCHSIZE items */ 2730 while (obj) { 2731 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) { 2732 PyErr_SetString(PyExc_TypeError, "dict items " 2733 "iterator must return 2-tuples"); 2734 goto error; 2735 } 2736 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 || 2737 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0) 2738 goto error; 2739 Py_CLEAR(obj); 2740 n += 1; 2741 2742 if (n == BATCHSIZE) 2743 break; 2744 2745 obj = PyIter_Next(iter); 2746 if (obj == NULL) { 2747 if (PyErr_Occurred()) 2748 goto error; 2749 break; 2750 } 2751 } 2752 2753 if (_Pickler_Write(self, &setitems_op, 1) < 0) 2754 goto error; 2755 2756 } while (n == BATCHSIZE); 2757 return 0; 2758 2759 error: 2760 Py_XDECREF(firstitem); 2761 Py_XDECREF(obj); 2762 return -1; 2763 } 2764 2765 /* This is a variant of batch_dict() above that specializes for dicts, with no 2766 * support for dict subclasses. Like batch_dict(), we batch up chunks of 2767 * MARK key value ... key value SETITEMS 2768 * opcode sequences. Calling code should have arranged to first create an 2769 * empty dict, or dict-like object, for the SETITEMS to operate on. 2770 * Returns 0 on success, -1 on error. 2771 * 2772 * Note that this currently doesn't work for protocol 0. 2773 */ 2774 static int 2775 batch_dict_exact(PicklerObject *self, PyObject *obj) 2776 { 2777 PyObject *key = NULL, *value = NULL; 2778 int i; 2779 Py_ssize_t dict_size, ppos = 0; 2780 2781 const char mark_op = MARK; 2782 const char setitem_op = SETITEM; 2783 const char setitems_op = SETITEMS; 2784 2785 assert(obj != NULL); 2786 assert(self->proto > 0); 2787 2788 dict_size = PyDict_Size(obj); 2789 2790 /* Special-case len(d) == 1 to save space. */ 2791 if (dict_size == 1) { 2792 PyDict_Next(obj, &ppos, &key, &value); 2793 if (save(self, key, 0) < 0) 2794 return -1; 2795 if (save(self, value, 0) < 0) 2796 return -1; 2797 if (_Pickler_Write(self, &setitem_op, 1) < 0) 2798 return -1; 2799 return 0; 2800 } 2801 2802 /* Write in batches of BATCHSIZE. */ 2803 do { 2804 i = 0; 2805 if (_Pickler_Write(self, &mark_op, 1) < 0) 2806 return -1; 2807 while (PyDict_Next(obj, &ppos, &key, &value)) { 2808 if (save(self, key, 0) < 0) 2809 return -1; 2810 if (save(self, value, 0) < 0) 2811 return -1; 2812 if (++i == BATCHSIZE) 2813 break; 2814 } 2815 if (_Pickler_Write(self, &setitems_op, 1) < 0) 2816 return -1; 2817 if (PyDict_Size(obj) != dict_size) { 2818 PyErr_Format( 2819 PyExc_RuntimeError, 2820 "dictionary changed size during iteration"); 2821 return -1; 2822 } 2823 2824 } while (i == BATCHSIZE); 2825 return 0; 2826 } 2827 2828 static int 2829 save_dict(PicklerObject *self, PyObject *obj) 2830 { 2831 PyObject *items, *iter; 2832 char header[3]; 2833 Py_ssize_t len; 2834 int status = 0; 2835 2836 if (self->fast && !fast_save_enter(self, obj)) 2837 goto error; 2838 2839 /* Create an empty dict. */ 2840 if (self->bin) { 2841 header[0] = EMPTY_DICT; 2842 len = 1; 2843 } 2844 else { 2845 header[0] = MARK; 2846 header[1] = DICT; 2847 len = 2; 2848 } 2849 2850 if (_Pickler_Write(self, header, len) < 0) 2851 goto error; 2852 2853 /* Get dict size, and bow out early if empty. */ 2854 if ((len = PyDict_Size(obj)) < 0) 2855 goto error; 2856 2857 if (memo_put(self, obj) < 0) 2858 goto error; 2859 2860 if (len != 0) { 2861 /* Save the dict items. */ 2862 if (PyDict_CheckExact(obj) && self->proto > 0) { 2863 /* We can take certain shortcuts if we know this is a dict and 2864 not a dict subclass. */ 2865 if (Py_EnterRecursiveCall(" while pickling an object")) 2866 goto error; 2867 status = batch_dict_exact(self, obj); 2868 Py_LeaveRecursiveCall(); 2869 } else { 2870 _Py_IDENTIFIER(items); 2871 2872 items = _PyObject_CallMethodId(obj, &PyId_items, NULL); 2873 if (items == NULL) 2874 goto error; 2875 iter = PyObject_GetIter(items); 2876 Py_DECREF(items); 2877 if (iter == NULL) 2878 goto error; 2879 if (Py_EnterRecursiveCall(" while pickling an object")) { 2880 Py_DECREF(iter); 2881 goto error; 2882 } 2883 status = batch_dict(self, iter); 2884 Py_LeaveRecursiveCall(); 2885 Py_DECREF(iter); 2886 } 2887 } 2888 2889 if (0) { 2890 error: 2891 status = -1; 2892 } 2893 2894 if (self->fast && !fast_save_leave(self, obj)) 2895 status = -1; 2896 2897 return status; 2898 } 2899 2900 static int 2901 save_set(PicklerObject *self, PyObject *obj) 2902 { 2903 PyObject *item; 2904 int i; 2905 Py_ssize_t set_size, ppos = 0; 2906 Py_hash_t hash; 2907 2908 const char empty_set_op = EMPTY_SET; 2909 const char mark_op = MARK; 2910 const char additems_op = ADDITEMS; 2911 2912 if (self->proto < 4) { 2913 PyObject *items; 2914 PyObject *reduce_value; 2915 int status; 2916 2917 items = PySequence_List(obj); 2918 if (items == NULL) { 2919 return -1; 2920 } 2921 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items); 2922 Py_DECREF(items); 2923 if (reduce_value == NULL) { 2924 return -1; 2925 } 2926 /* save_reduce() will memoize the object automatically. */ 2927 status = save_reduce(self, reduce_value, obj); 2928 Py_DECREF(reduce_value); 2929 return status; 2930 } 2931 2932 if (_Pickler_Write(self, &empty_set_op, 1) < 0) 2933 return -1; 2934 2935 if (memo_put(self, obj) < 0) 2936 return -1; 2937 2938 set_size = PySet_GET_SIZE(obj); 2939 if (set_size == 0) 2940 return 0; /* nothing to do */ 2941 2942 /* Write in batches of BATCHSIZE. */ 2943 do { 2944 i = 0; 2945 if (_Pickler_Write(self, &mark_op, 1) < 0) 2946 return -1; 2947 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) { 2948 if (save(self, item, 0) < 0) 2949 return -1; 2950 if (++i == BATCHSIZE) 2951 break; 2952 } 2953 if (_Pickler_Write(self, &additems_op, 1) < 0) 2954 return -1; 2955 if (PySet_GET_SIZE(obj) != set_size) { 2956 PyErr_Format( 2957 PyExc_RuntimeError, 2958 "set changed size during iteration"); 2959 return -1; 2960 } 2961 } while (i == BATCHSIZE); 2962 2963 return 0; 2964 } 2965 2966 static int 2967 save_frozenset(PicklerObject *self, PyObject *obj) 2968 { 2969 PyObject *iter; 2970 2971 const char mark_op = MARK; 2972 const char frozenset_op = FROZENSET; 2973 2974 if (self->fast && !fast_save_enter(self, obj)) 2975 return -1; 2976 2977 if (self->proto < 4) { 2978 PyObject *items; 2979 PyObject *reduce_value; 2980 int status; 2981 2982 items = PySequence_List(obj); 2983 if (items == NULL) { 2984 return -1; 2985 } 2986 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type, 2987 items); 2988 Py_DECREF(items); 2989 if (reduce_value == NULL) { 2990 return -1; 2991 } 2992 /* save_reduce() will memoize the object automatically. */ 2993 status = save_reduce(self, reduce_value, obj); 2994 Py_DECREF(reduce_value); 2995 return status; 2996 } 2997 2998 if (_Pickler_Write(self, &mark_op, 1) < 0) 2999 return -1; 3000 3001 iter = PyObject_GetIter(obj); 3002 if (iter == NULL) { 3003 return -1; 3004 } 3005 for (;;) { 3006 PyObject *item; 3007 3008 item = PyIter_Next(iter); 3009 if (item == NULL) { 3010 if (PyErr_Occurred()) { 3011 Py_DECREF(iter); 3012 return -1; 3013 } 3014 break; 3015 } 3016 if (save(self, item, 0) < 0) { 3017 Py_DECREF(item); 3018 Py_DECREF(iter); 3019 return -1; 3020 } 3021 Py_DECREF(item); 3022 } 3023 Py_DECREF(iter); 3024 3025 /* If the object is already in the memo, this means it is 3026 recursive. In this case, throw away everything we put on the 3027 stack, and fetch the object back from the memo. */ 3028 if (PyMemoTable_Get(self->memo, obj)) { 3029 const char pop_mark_op = POP_MARK; 3030 3031 if (_Pickler_Write(self, &pop_mark_op, 1) < 0) 3032 return -1; 3033 if (memo_get(self, obj) < 0) 3034 return -1; 3035 return 0; 3036 } 3037 3038 if (_Pickler_Write(self, &frozenset_op, 1) < 0) 3039 return -1; 3040 if (memo_put(self, obj) < 0) 3041 return -1; 3042 3043 return 0; 3044 } 3045 3046 static int 3047 fix_imports(PyObject **module_name, PyObject **global_name) 3048 { 3049 PyObject *key; 3050 PyObject *item; 3051 PickleState *st = _Pickle_GetGlobalState(); 3052 3053 key = PyTuple_Pack(2, *module_name, *global_name); 3054 if (key == NULL) 3055 return -1; 3056 item = PyDict_GetItemWithError(st->name_mapping_3to2, key); 3057 Py_DECREF(key); 3058 if (item) { 3059 PyObject *fixed_module_name; 3060 PyObject *fixed_global_name; 3061 3062 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { 3063 PyErr_Format(PyExc_RuntimeError, 3064 "_compat_pickle.REVERSE_NAME_MAPPING values " 3065 "should be 2-tuples, not %.200s", 3066 Py_TYPE(item)->tp_name); 3067 return -1; 3068 } 3069 fixed_module_name = PyTuple_GET_ITEM(item, 0); 3070 fixed_global_name = PyTuple_GET_ITEM(item, 1); 3071 if (!PyUnicode_Check(fixed_module_name) || 3072 !PyUnicode_Check(fixed_global_name)) { 3073 PyErr_Format(PyExc_RuntimeError, 3074 "_compat_pickle.REVERSE_NAME_MAPPING values " 3075 "should be pairs of str, not (%.200s, %.200s)", 3076 Py_TYPE(fixed_module_name)->tp_name, 3077 Py_TYPE(fixed_global_name)->tp_name); 3078 return -1; 3079 } 3080 3081 Py_CLEAR(*module_name); 3082 Py_CLEAR(*global_name); 3083 Py_INCREF(fixed_module_name); 3084 Py_INCREF(fixed_global_name); 3085 *module_name = fixed_module_name; 3086 *global_name = fixed_global_name; 3087 return 0; 3088 } 3089 else if (PyErr_Occurred()) { 3090 return -1; 3091 } 3092 3093 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name); 3094 if (item) { 3095 if (!PyUnicode_Check(item)) { 3096 PyErr_Format(PyExc_RuntimeError, 3097 "_compat_pickle.REVERSE_IMPORT_MAPPING values " 3098 "should be strings, not %.200s", 3099 Py_TYPE(item)->tp_name); 3100 return -1; 3101 } 3102 Py_INCREF(item); 3103 Py_XSETREF(*module_name, item); 3104 } 3105 else if (PyErr_Occurred()) { 3106 return -1; 3107 } 3108 3109 return 0; 3110 } 3111 3112 static int 3113 save_global(PicklerObject *self, PyObject *obj, PyObject *name) 3114 { 3115 PyObject *global_name = NULL; 3116 PyObject *module_name = NULL; 3117 PyObject *module = NULL; 3118 PyObject *parent = NULL; 3119 PyObject *dotted_path = NULL; 3120 PyObject *lastname = NULL; 3121 PyObject *cls; 3122 PickleState *st = _Pickle_GetGlobalState(); 3123 int status = 0; 3124 _Py_IDENTIFIER(__name__); 3125 _Py_IDENTIFIER(__qualname__); 3126 3127 const char global_op = GLOBAL; 3128 3129 if (name) { 3130 Py_INCREF(name); 3131 global_name = name; 3132 } 3133 else { 3134 global_name = _PyObject_GetAttrId(obj, &PyId___qualname__); 3135 if (global_name == NULL) { 3136 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) 3137 goto error; 3138 PyErr_Clear(); 3139 } 3140 if (global_name == NULL) { 3141 global_name = _PyObject_GetAttrId(obj, &PyId___name__); 3142 if (global_name == NULL) 3143 goto error; 3144 } 3145 } 3146 3147 dotted_path = get_dotted_path(module, global_name); 3148 if (dotted_path == NULL) 3149 goto error; 3150 module_name = whichmodule(obj, dotted_path); 3151 if (module_name == NULL) 3152 goto error; 3153 3154 /* XXX: Change to use the import C API directly with level=0 to disallow 3155 relative imports. 3156 3157 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses 3158 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore 3159 custom import functions (IMHO, this would be a nice security 3160 feature). The import C API would need to be extended to support the 3161 extra parameters of __import__ to fix that. */ 3162 module = PyImport_Import(module_name); 3163 if (module == NULL) { 3164 PyErr_Format(st->PicklingError, 3165 "Can't pickle %R: import of module %R failed", 3166 obj, module_name); 3167 goto error; 3168 } 3169 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1); 3170 Py_INCREF(lastname); 3171 cls = get_deep_attribute(module, dotted_path, &parent); 3172 Py_CLEAR(dotted_path); 3173 if (cls == NULL) { 3174 PyErr_Format(st->PicklingError, 3175 "Can't pickle %R: attribute lookup %S on %S failed", 3176 obj, global_name, module_name); 3177 goto error; 3178 } 3179 if (cls != obj) { 3180 Py_DECREF(cls); 3181 PyErr_Format(st->PicklingError, 3182 "Can't pickle %R: it's not the same object as %S.%S", 3183 obj, module_name, global_name); 3184 goto error; 3185 } 3186 Py_DECREF(cls); 3187 3188 if (self->proto >= 2) { 3189 /* See whether this is in the extension registry, and if 3190 * so generate an EXT opcode. 3191 */ 3192 PyObject *extension_key; 3193 PyObject *code_obj; /* extension code as Python object */ 3194 long code; /* extension code as C value */ 3195 char pdata[5]; 3196 Py_ssize_t n; 3197 3198 extension_key = PyTuple_Pack(2, module_name, global_name); 3199 if (extension_key == NULL) { 3200 goto error; 3201 } 3202 code_obj = PyDict_GetItemWithError(st->extension_registry, 3203 extension_key); 3204 Py_DECREF(extension_key); 3205 /* The object is not registered in the extension registry. 3206 This is the most likely code path. */ 3207 if (code_obj == NULL) { 3208 if (PyErr_Occurred()) { 3209 goto error; 3210 } 3211 goto gen_global; 3212 } 3213 3214 /* XXX: pickle.py doesn't check neither the type, nor the range 3215 of the value returned by the extension_registry. It should for 3216 consistency. */ 3217 3218 /* Verify code_obj has the right type and value. */ 3219 if (!PyLong_Check(code_obj)) { 3220 PyErr_Format(st->PicklingError, 3221 "Can't pickle %R: extension code %R isn't an integer", 3222 obj, code_obj); 3223 goto error; 3224 } 3225 code = PyLong_AS_LONG(code_obj); 3226 if (code <= 0 || code > 0x7fffffffL) { 3227 if (!PyErr_Occurred()) 3228 PyErr_Format(st->PicklingError, "Can't pickle %R: extension " 3229 "code %ld is out of range", obj, code); 3230 goto error; 3231 } 3232 3233 /* Generate an EXT opcode. */ 3234 if (code <= 0xff) { 3235 pdata[0] = EXT1; 3236 pdata[1] = (unsigned char)code; 3237 n = 2; 3238 } 3239 else if (code <= 0xffff) { 3240 pdata[0] = EXT2; 3241 pdata[1] = (unsigned char)(code & 0xff); 3242 pdata[2] = (unsigned char)((code >> 8) & 0xff); 3243 n = 3; 3244 } 3245 else { 3246 pdata[0] = EXT4; 3247 pdata[1] = (unsigned char)(code & 0xff); 3248 pdata[2] = (unsigned char)((code >> 8) & 0xff); 3249 pdata[3] = (unsigned char)((code >> 16) & 0xff); 3250 pdata[4] = (unsigned char)((code >> 24) & 0xff); 3251 n = 5; 3252 } 3253 3254 if (_Pickler_Write(self, pdata, n) < 0) 3255 goto error; 3256 } 3257 else { 3258 gen_global: 3259 if (parent == module) { 3260 Py_INCREF(lastname); 3261 Py_DECREF(global_name); 3262 global_name = lastname; 3263 } 3264 if (self->proto >= 4) { 3265 const char stack_global_op = STACK_GLOBAL; 3266 3267 if (save(self, module_name, 0) < 0) 3268 goto error; 3269 if (save(self, global_name, 0) < 0) 3270 goto error; 3271 3272 if (_Pickler_Write(self, &stack_global_op, 1) < 0) 3273 goto error; 3274 } 3275 else if (parent != module) { 3276 PickleState *st = _Pickle_GetGlobalState(); 3277 PyObject *reduce_value = Py_BuildValue("(O(OO))", 3278 st->getattr, parent, lastname); 3279 status = save_reduce(self, reduce_value, NULL); 3280 Py_DECREF(reduce_value); 3281 if (status < 0) 3282 goto error; 3283 } 3284 else { 3285 /* Generate a normal global opcode if we are using a pickle 3286 protocol < 4, or if the object is not registered in the 3287 extension registry. */ 3288 PyObject *encoded; 3289 PyObject *(*unicode_encoder)(PyObject *); 3290 3291 if (_Pickler_Write(self, &global_op, 1) < 0) 3292 goto error; 3293 3294 /* For protocol < 3 and if the user didn't request against doing 3295 so, we convert module names to the old 2.x module names. */ 3296 if (self->proto < 3 && self->fix_imports) { 3297 if (fix_imports(&module_name, &global_name) < 0) { 3298 goto error; 3299 } 3300 } 3301 3302 /* Since Python 3.0 now supports non-ASCII identifiers, we encode 3303 both the module name and the global name using UTF-8. We do so 3304 only when we are using the pickle protocol newer than version 3305 3. This is to ensure compatibility with older Unpickler running 3306 on Python 2.x. */ 3307 if (self->proto == 3) { 3308 unicode_encoder = PyUnicode_AsUTF8String; 3309 } 3310 else { 3311 unicode_encoder = PyUnicode_AsASCIIString; 3312 } 3313 encoded = unicode_encoder(module_name); 3314 if (encoded == NULL) { 3315 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) 3316 PyErr_Format(st->PicklingError, 3317 "can't pickle module identifier '%S' using " 3318 "pickle protocol %i", 3319 module_name, self->proto); 3320 goto error; 3321 } 3322 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), 3323 PyBytes_GET_SIZE(encoded)) < 0) { 3324 Py_DECREF(encoded); 3325 goto error; 3326 } 3327 Py_DECREF(encoded); 3328 if(_Pickler_Write(self, "\n", 1) < 0) 3329 goto error; 3330 3331 /* Save the name of the module. */ 3332 encoded = unicode_encoder(global_name); 3333 if (encoded == NULL) { 3334 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) 3335 PyErr_Format(st->PicklingError, 3336 "can't pickle global identifier '%S' using " 3337 "pickle protocol %i", 3338 global_name, self->proto); 3339 goto error; 3340 } 3341 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), 3342 PyBytes_GET_SIZE(encoded)) < 0) { 3343 Py_DECREF(encoded); 3344 goto error; 3345 } 3346 Py_DECREF(encoded); 3347 if (_Pickler_Write(self, "\n", 1) < 0) 3348 goto error; 3349 } 3350 /* Memoize the object. */ 3351 if (memo_put(self, obj) < 0) 3352 goto error; 3353 } 3354 3355 if (0) { 3356 error: 3357 status = -1; 3358 } 3359 Py_XDECREF(module_name); 3360 Py_XDECREF(global_name); 3361 Py_XDECREF(module); 3362 Py_XDECREF(parent); 3363 Py_XDECREF(dotted_path); 3364 Py_XDECREF(lastname); 3365 3366 return status; 3367 } 3368 3369 static int 3370 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton) 3371 { 3372 PyObject *reduce_value; 3373 int status; 3374 3375 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton); 3376 if (reduce_value == NULL) { 3377 return -1; 3378 } 3379 status = save_reduce(self, reduce_value, obj); 3380 Py_DECREF(reduce_value); 3381 return status; 3382 } 3383 3384 static int 3385 save_type(PicklerObject *self, PyObject *obj) 3386 { 3387 if (obj == (PyObject *)&_PyNone_Type) { 3388 return save_singleton_type(self, obj, Py_None); 3389 } 3390 else if (obj == (PyObject *)&PyEllipsis_Type) { 3391 return save_singleton_type(self, obj, Py_Ellipsis); 3392 } 3393 else if (obj == (PyObject *)&_PyNotImplemented_Type) { 3394 return save_singleton_type(self, obj, Py_NotImplemented); 3395 } 3396 return save_global(self, obj, NULL); 3397 } 3398 3399 static int 3400 save_pers(PicklerObject *self, PyObject *obj, PyObject *func) 3401 { 3402 PyObject *pid = NULL; 3403 int status = 0; 3404 3405 const char persid_op = PERSID; 3406 const char binpersid_op = BINPERSID; 3407 3408 Py_INCREF(obj); 3409 pid = _Pickle_FastCall(func, obj); 3410 if (pid == NULL) 3411 return -1; 3412 3413 if (pid != Py_None) { 3414 if (self->bin) { 3415 if (save(self, pid, 1) < 0 || 3416 _Pickler_Write(self, &binpersid_op, 1) < 0) 3417 goto error; 3418 } 3419 else { 3420 PyObject *pid_str; 3421 3422 pid_str = PyObject_Str(pid); 3423 if (pid_str == NULL) 3424 goto error; 3425 3426 /* XXX: Should it check whether the pid contains embedded 3427 newlines? */ 3428 if (!PyUnicode_IS_ASCII(pid_str)) { 3429 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError, 3430 "persistent IDs in protocol 0 must be " 3431 "ASCII strings"); 3432 Py_DECREF(pid_str); 3433 goto error; 3434 } 3435 3436 if (_Pickler_Write(self, &persid_op, 1) < 0 || 3437 _Pickler_Write(self, PyUnicode_DATA(pid_str), 3438 PyUnicode_GET_LENGTH(pid_str)) < 0 || 3439 _Pickler_Write(self, "\n", 1) < 0) { 3440 Py_DECREF(pid_str); 3441 goto error; 3442 } 3443 Py_DECREF(pid_str); 3444 } 3445 status = 1; 3446 } 3447 3448 if (0) { 3449 error: 3450 status = -1; 3451 } 3452 Py_XDECREF(pid); 3453 3454 return status; 3455 } 3456 3457 static PyObject * 3458 get_class(PyObject *obj) 3459 { 3460 PyObject *cls; 3461 _Py_IDENTIFIER(__class__); 3462 3463 cls = _PyObject_GetAttrId(obj, &PyId___class__); 3464 if (cls == NULL) { 3465 if (PyErr_ExceptionMatches(PyExc_AttributeError)) { 3466 PyErr_Clear(); 3467 cls = (PyObject *) Py_TYPE(obj); 3468 Py_INCREF(cls); 3469 } 3470 } 3471 return cls; 3472 } 3473 3474 /* We're saving obj, and args is the 2-thru-5 tuple returned by the 3475 * appropriate __reduce__ method for obj. 3476 */ 3477 static int 3478 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj) 3479 { 3480 PyObject *callable; 3481 PyObject *argtup; 3482 PyObject *state = NULL; 3483 PyObject *listitems = Py_None; 3484 PyObject *dictitems = Py_None; 3485 PickleState *st = _Pickle_GetGlobalState(); 3486 Py_ssize_t size; 3487 int use_newobj = 0, use_newobj_ex = 0; 3488 3489 const char reduce_op = REDUCE; 3490 const char build_op = BUILD; 3491 const char newobj_op = NEWOBJ; 3492 const char newobj_ex_op = NEWOBJ_EX; 3493 3494 size = PyTuple_Size(args); 3495 if (size < 2 || size > 5) { 3496 PyErr_SetString(st->PicklingError, "tuple returned by " 3497 "__reduce__ must contain 2 through 5 elements"); 3498 return -1; 3499 } 3500 3501 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5, 3502 &callable, &argtup, &state, &listitems, &dictitems)) 3503 return -1; 3504 3505 if (!PyCallable_Check(callable)) { 3506 PyErr_SetString(st->PicklingError, "first item of the tuple " 3507 "returned by __reduce__ must be callable"); 3508 return -1; 3509 } 3510 if (!PyTuple_Check(argtup)) { 3511 PyErr_SetString(st->PicklingError, "second item of the tuple " 3512 "returned by __reduce__ must be a tuple"); 3513 return -1; 3514 } 3515 3516 if (state == Py_None) 3517 state = NULL; 3518 3519 if (listitems == Py_None) 3520 listitems = NULL; 3521 else if (!PyIter_Check(listitems)) { 3522 PyErr_Format(st->PicklingError, "fourth element of the tuple " 3523 "returned by __reduce__ must be an iterator, not %s", 3524 Py_TYPE(listitems)->tp_name); 3525 return -1; 3526 } 3527 3528 if (dictitems == Py_None) 3529 dictitems = NULL; 3530 else if (!PyIter_Check(dictitems)) { 3531 PyErr_Format(st->PicklingError, "fifth element of the tuple " 3532 "returned by __reduce__ must be an iterator, not %s", 3533 Py_TYPE(dictitems)->tp_name); 3534 return -1; 3535 } 3536 3537 if (self->proto >= 2) { 3538 PyObject *name; 3539 _Py_IDENTIFIER(__name__); 3540 3541 name = _PyObject_GetAttrId(callable, &PyId___name__); 3542 if (name == NULL) { 3543 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { 3544 return -1; 3545 } 3546 PyErr_Clear(); 3547 } 3548 else if (PyUnicode_Check(name)) { 3549 _Py_IDENTIFIER(__newobj_ex__); 3550 use_newobj_ex = _PyUnicode_EqualToASCIIId( 3551 name, &PyId___newobj_ex__); 3552 if (!use_newobj_ex) { 3553 _Py_IDENTIFIER(__newobj__); 3554 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__); 3555 } 3556 } 3557 Py_XDECREF(name); 3558 } 3559 3560 if (use_newobj_ex) { 3561 PyObject *cls; 3562 PyObject *args; 3563 PyObject *kwargs; 3564 3565 if (Py_SIZE(argtup) != 3) { 3566 PyErr_Format(st->PicklingError, 3567 "length of the NEWOBJ_EX argument tuple must be " 3568 "exactly 3, not %zd", Py_SIZE(argtup)); 3569 return -1; 3570 } 3571 3572 cls = PyTuple_GET_ITEM(argtup, 0); 3573 if (!PyType_Check(cls)) { 3574 PyErr_Format(st->PicklingError, 3575 "first item from NEWOBJ_EX argument tuple must " 3576 "be a class, not %.200s", Py_TYPE(cls)->tp_name); 3577 return -1; 3578 } 3579 args = PyTuple_GET_ITEM(argtup, 1); 3580 if (!PyTuple_Check(args)) { 3581 PyErr_Format(st->PicklingError, 3582 "second item from NEWOBJ_EX argument tuple must " 3583 "be a tuple, not %.200s", Py_TYPE(args)->tp_name); 3584 return -1; 3585 } 3586 kwargs = PyTuple_GET_ITEM(argtup, 2); 3587 if (!PyDict_Check(kwargs)) { 3588 PyErr_Format(st->PicklingError, 3589 "third item from NEWOBJ_EX argument tuple must " 3590 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name); 3591 return -1; 3592 } 3593 3594 if (self->proto >= 4) { 3595 if (save(self, cls, 0) < 0 || 3596 save(self, args, 0) < 0 || 3597 save(self, kwargs, 0) < 0 || 3598 _Pickler_Write(self, &newobj_ex_op, 1) < 0) { 3599 return -1; 3600 } 3601 } 3602 else { 3603 PyObject *newargs; 3604 PyObject *cls_new; 3605 Py_ssize_t i; 3606 _Py_IDENTIFIER(__new__); 3607 3608 newargs = PyTuple_New(Py_SIZE(args) + 2); 3609 if (newargs == NULL) 3610 return -1; 3611 3612 cls_new = _PyObject_GetAttrId(cls, &PyId___new__); 3613 if (cls_new == NULL) { 3614 Py_DECREF(newargs); 3615 return -1; 3616 } 3617 PyTuple_SET_ITEM(newargs, 0, cls_new); 3618 Py_INCREF(cls); 3619 PyTuple_SET_ITEM(newargs, 1, cls); 3620 for (i = 0; i < Py_SIZE(args); i++) { 3621 PyObject *item = PyTuple_GET_ITEM(args, i); 3622 Py_INCREF(item); 3623 PyTuple_SET_ITEM(newargs, i + 2, item); 3624 } 3625 3626 callable = PyObject_Call(st->partial, newargs, kwargs); 3627 Py_DECREF(newargs); 3628 if (callable == NULL) 3629 return -1; 3630 3631 newargs = PyTuple_New(0); 3632 if (newargs == NULL) { 3633 Py_DECREF(callable); 3634 return -1; 3635 } 3636 3637 if (save(self, callable, 0) < 0 || 3638 save(self, newargs, 0) < 0 || 3639 _Pickler_Write(self, &reduce_op, 1) < 0) { 3640 Py_DECREF(newargs); 3641 Py_DECREF(callable); 3642 return -1; 3643 } 3644 Py_DECREF(newargs); 3645 Py_DECREF(callable); 3646 } 3647 } 3648 else if (use_newobj) { 3649 PyObject *cls; 3650 PyObject *newargtup; 3651 PyObject *obj_class; 3652 int p; 3653 3654 /* Sanity checks. */ 3655 if (Py_SIZE(argtup) < 1) { 3656 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty"); 3657 return -1; 3658 } 3659 3660 cls = PyTuple_GET_ITEM(argtup, 0); 3661 if (!PyType_Check(cls)) { 3662 PyErr_SetString(st->PicklingError, "args[0] from " 3663 "__newobj__ args is not a type"); 3664 return -1; 3665 } 3666 3667 if (obj != NULL) { 3668 obj_class = get_class(obj); 3669 p = obj_class != cls; /* true iff a problem */ 3670 Py_DECREF(obj_class); 3671 if (p) { 3672 PyErr_SetString(st->PicklingError, "args[0] from " 3673 "__newobj__ args has the wrong class"); 3674 return -1; 3675 } 3676 } 3677 /* XXX: These calls save() are prone to infinite recursion. Imagine 3678 what happen if the value returned by the __reduce__() method of 3679 some extension type contains another object of the same type. Ouch! 3680 3681 Here is a quick example, that I ran into, to illustrate what I 3682 mean: 3683 3684 >>> import pickle, copyreg 3685 >>> copyreg.dispatch_table.pop(complex) 3686 >>> pickle.dumps(1+2j) 3687 Traceback (most recent call last): 3688 ... 3689 RecursionError: maximum recursion depth exceeded 3690 3691 Removing the complex class from copyreg.dispatch_table made the 3692 __reduce_ex__() method emit another complex object: 3693 3694 >>> (1+1j).__reduce_ex__(2) 3695 (<function __newobj__ at 0xb7b71c3c>, 3696 (<class 'complex'>, (1+1j)), None, None, None) 3697 3698 Thus when save() was called on newargstup (the 2nd item) recursion 3699 ensued. Of course, the bug was in the complex class which had a 3700 broken __getnewargs__() that emitted another complex object. But, 3701 the point, here, is it is quite easy to end up with a broken reduce 3702 function. */ 3703 3704 /* Save the class and its __new__ arguments. */ 3705 if (save(self, cls, 0) < 0) 3706 return -1; 3707 3708 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup)); 3709 if (newargtup == NULL) 3710 return -1; 3711 3712 p = save(self, newargtup, 0); 3713 Py_DECREF(newargtup); 3714 if (p < 0) 3715 return -1; 3716 3717 /* Add NEWOBJ opcode. */ 3718 if (_Pickler_Write(self, &newobj_op, 1) < 0) 3719 return -1; 3720 } 3721 else { /* Not using NEWOBJ. */ 3722 if (save(self, callable, 0) < 0 || 3723 save(self, argtup, 0) < 0 || 3724 _Pickler_Write(self, &reduce_op, 1) < 0) 3725 return -1; 3726 } 3727 3728 /* obj can be NULL when save_reduce() is used directly. A NULL obj means 3729 the caller do not want to memoize the object. Not particularly useful, 3730 but that is to mimic the behavior save_reduce() in pickle.py when 3731 obj is None. */ 3732 if (obj != NULL) { 3733 /* If the object is already in the memo, this means it is 3734 recursive. In this case, throw away everything we put on the 3735 stack, and fetch the object back from the memo. */ 3736 if (PyMemoTable_Get(self->memo, obj)) { 3737 const char pop_op = POP; 3738 3739 if (_Pickler_Write(self, &pop_op, 1) < 0) 3740 return -1; 3741 if (memo_get(self, obj) < 0) 3742 return -1; 3743 3744 return 0; 3745 } 3746 else if (memo_put(self, obj) < 0) 3747 return -1; 3748 } 3749 3750 if (listitems && batch_list(self, listitems) < 0) 3751 return -1; 3752 3753 if (dictitems && batch_dict(self, dictitems) < 0) 3754 return -1; 3755 3756 if (state) { 3757 if (save(self, state, 0) < 0 || 3758 _Pickler_Write(self, &build_op, 1) < 0) 3759 return -1; 3760 } 3761 3762 return 0; 3763 } 3764 3765 static int 3766 save(PicklerObject *self, PyObject *obj, int pers_save) 3767 { 3768 PyTypeObject *type; 3769 PyObject *reduce_func = NULL; 3770 PyObject *reduce_value = NULL; 3771 int status = 0; 3772 3773 if (_Pickler_OpcodeBoundary(self) < 0) 3774 return -1; 3775 3776 if (Py_EnterRecursiveCall(" while pickling an object")) 3777 return -1; 3778 3779 /* The extra pers_save argument is necessary to avoid calling save_pers() 3780 on its returned object. */ 3781 if (!pers_save && self->pers_func) { 3782 /* save_pers() returns: 3783 -1 to signal an error; 3784 0 if it did nothing successfully; 3785 1 if a persistent id was saved. 3786 */ 3787 if ((status = save_pers(self, obj, self->pers_func)) != 0) 3788 goto done; 3789 } 3790 3791 type = Py_TYPE(obj); 3792 3793 /* The old cPickle had an optimization that used switch-case statement 3794 dispatching on the first letter of the type name. This has was removed 3795 since benchmarks shown that this optimization was actually slowing 3796 things down. */ 3797 3798 /* Atom types; these aren't memoized, so don't check the memo. */ 3799 3800 if (obj == Py_None) { 3801 status = save_none(self, obj); 3802 goto done; 3803 } 3804 else if (obj == Py_False || obj == Py_True) { 3805 status = save_bool(self, obj); 3806 goto done; 3807 } 3808 else if (type == &PyLong_Type) { 3809 status = save_long(self, obj); 3810 goto done; 3811 } 3812 else if (type == &PyFloat_Type) { 3813 status = save_float(self, obj); 3814 goto done; 3815 } 3816 3817 /* Check the memo to see if it has the object. If so, generate 3818 a GET (or BINGET) opcode, instead of pickling the object 3819 once again. */ 3820 if (PyMemoTable_Get(self->memo, obj)) { 3821 if (memo_get(self, obj) < 0) 3822 goto error; 3823 goto done; 3824 } 3825 3826 if (type == &PyBytes_Type) { 3827 status = save_bytes(self, obj); 3828 goto done; 3829 } 3830 else if (type == &PyUnicode_Type) { 3831 status = save_unicode(self, obj); 3832 goto done; 3833 } 3834 else if (type == &PyDict_Type) { 3835 status = save_dict(self, obj); 3836 goto done; 3837 } 3838 else if (type == &PySet_Type) { 3839 status = save_set(self, obj); 3840 goto done; 3841 } 3842 else if (type == &PyFrozenSet_Type) { 3843 status = save_frozenset(self, obj); 3844 goto done; 3845 } 3846 else if (type == &PyList_Type) { 3847 status = save_list(self, obj); 3848 goto done; 3849 } 3850 else if (type == &PyTuple_Type) { 3851 status = save_tuple(self, obj); 3852 goto done; 3853 } 3854 else if (type == &PyType_Type) { 3855 status = save_type(self, obj); 3856 goto done; 3857 } 3858 else if (type == &PyFunction_Type) { 3859 status = save_global(self, obj, NULL); 3860 goto done; 3861 } 3862 3863 /* XXX: This part needs some unit tests. */ 3864 3865 /* Get a reduction callable, and call it. This may come from 3866 * self.dispatch_table, copyreg.dispatch_table, the object's 3867 * __reduce_ex__ method, or the object's __reduce__ method. 3868 */ 3869 if (self->dispatch_table == NULL) { 3870 PickleState *st = _Pickle_GetGlobalState(); 3871 reduce_func = PyDict_GetItemWithError(st->dispatch_table, 3872 (PyObject *)type); 3873 if (reduce_func == NULL) { 3874 if (PyErr_Occurred()) { 3875 goto error; 3876 } 3877 } else { 3878 /* PyDict_GetItemWithError() returns a borrowed reference. 3879 Increase the reference count to be consistent with 3880 PyObject_GetItem and _PyObject_GetAttrId used below. */ 3881 Py_INCREF(reduce_func); 3882 } 3883 } else { 3884 reduce_func = PyObject_GetItem(self->dispatch_table, 3885 (PyObject *)type); 3886 if (reduce_func == NULL) { 3887 if (PyErr_ExceptionMatches(PyExc_KeyError)) 3888 PyErr_Clear(); 3889 else 3890 goto error; 3891 } 3892 } 3893 if (reduce_func != NULL) { 3894 Py_INCREF(obj); 3895 reduce_value = _Pickle_FastCall(reduce_func, obj); 3896 } 3897 else if (PyType_IsSubtype(type, &PyType_Type)) { 3898 status = save_global(self, obj, NULL); 3899 goto done; 3900 } 3901 else { 3902 _Py_IDENTIFIER(__reduce__); 3903 _Py_IDENTIFIER(__reduce_ex__); 3904 3905 3906 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is 3907 automatically defined as __reduce__. While this is convenient, this 3908 make it impossible to know which method was actually called. Of 3909 course, this is not a big deal. But still, it would be nice to let 3910 the user know which method was called when something go 3911 wrong. Incidentally, this means if __reduce_ex__ is not defined, we 3912 don't actually have to check for a __reduce__ method. */ 3913 3914 /* Check for a __reduce_ex__ method. */ 3915 reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce_ex__); 3916 if (reduce_func != NULL) { 3917 PyObject *proto; 3918 proto = PyLong_FromLong(self->proto); 3919 if (proto != NULL) { 3920 reduce_value = _Pickle_FastCall(reduce_func, proto); 3921 } 3922 } 3923 else { 3924 PickleState *st = _Pickle_GetGlobalState(); 3925 3926 if (PyErr_ExceptionMatches(PyExc_AttributeError)) { 3927 PyErr_Clear(); 3928 } 3929 else { 3930 goto error; 3931 } 3932 /* Check for a __reduce__ method. */ 3933 reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce__); 3934 if (reduce_func != NULL) { 3935 reduce_value = _PyObject_CallNoArg(reduce_func); 3936 } 3937 else { 3938 PyErr_Format(st->PicklingError, 3939 "can't pickle '%.200s' object: %R", 3940 type->tp_name, obj); 3941 goto error; 3942 } 3943 } 3944 } 3945 3946 if (reduce_value == NULL) 3947 goto error; 3948 3949 if (PyUnicode_Check(reduce_value)) { 3950 status = save_global(self, obj, reduce_value); 3951 goto done; 3952 } 3953 3954 if (!PyTuple_Check(reduce_value)) { 3955 PickleState *st = _Pickle_GetGlobalState(); 3956 PyErr_SetString(st->PicklingError, 3957 "__reduce__ must return a string or tuple"); 3958 goto error; 3959 } 3960 3961 status = save_reduce(self, reduce_value, obj); 3962 3963 if (0) { 3964 error: 3965 status = -1; 3966 } 3967 done: 3968 3969 Py_LeaveRecursiveCall(); 3970 Py_XDECREF(reduce_func); 3971 Py_XDECREF(reduce_value); 3972 3973 return status; 3974 } 3975 3976 static int 3977 dump(PicklerObject *self, PyObject *obj) 3978 { 3979 const char stop_op = STOP; 3980 3981 if (self->proto >= 2) { 3982 char header[2]; 3983 3984 header[0] = PROTO; 3985 assert(self->proto >= 0 && self->proto < 256); 3986 header[1] = (unsigned char)self->proto; 3987 if (_Pickler_Write(self, header, 2) < 0) 3988 return -1; 3989 if (self->proto >= 4) 3990 self->framing = 1; 3991 } 3992 3993 if (save(self, obj, 0) < 0 || 3994 _Pickler_Write(self, &stop_op, 1) < 0) 3995 return -1; 3996 3997 return 0; 3998 } 3999 4000 /*[clinic input] 4001 4002 _pickle.Pickler.clear_memo 4003 4004 Clears the pickler's "memo". 4005 4006 The memo is the data structure that remembers which objects the 4007 pickler has already seen, so that shared or recursive objects are 4008 pickled by reference and not by value. This method is useful when 4009 re-using picklers. 4010 [clinic start generated code]*/ 4011 4012 static PyObject * 4013 _pickle_Pickler_clear_memo_impl(PicklerObject *self) 4014 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/ 4015 { 4016 if (self->memo) 4017 PyMemoTable_Clear(self->memo); 4018 4019 Py_RETURN_NONE; 4020 } 4021 4022 /*[clinic input] 4023 4024 _pickle.Pickler.dump 4025 4026 obj: object 4027 / 4028 4029 Write a pickled representation of the given object to the open file. 4030 [clinic start generated code]*/ 4031 4032 static PyObject * 4033 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj) 4034 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/ 4035 { 4036 /* Check whether the Pickler was initialized correctly (issue3664). 4037 Developers often forget to call __init__() in their subclasses, which 4038 would trigger a segfault without this check. */ 4039 if (self->write == NULL) { 4040 PickleState *st = _Pickle_GetGlobalState(); 4041 PyErr_Format(st->PicklingError, 4042 "Pickler.__init__() was not called by %s.__init__()", 4043 Py_TYPE(self)->tp_name); 4044 return NULL; 4045 } 4046 4047 if (_Pickler_ClearBuffer(self) < 0) 4048 return NULL; 4049 4050 if (dump(self, obj) < 0) 4051 return NULL; 4052 4053 if (_Pickler_FlushToFile(self) < 0) 4054 return NULL; 4055 4056 Py_RETURN_NONE; 4057 } 4058 4059 /*[clinic input] 4060 4061 _pickle.Pickler.__sizeof__ -> Py_ssize_t 4062 4063 Returns size in memory, in bytes. 4064 [clinic start generated code]*/ 4065 4066 static Py_ssize_t 4067 _pickle_Pickler___sizeof___impl(PicklerObject *self) 4068 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/ 4069 { 4070 Py_ssize_t res, s; 4071 4072 res = _PyObject_SIZE(Py_TYPE(self)); 4073 if (self->memo != NULL) { 4074 res += sizeof(PyMemoTable); 4075 res += self->memo->mt_allocated * sizeof(PyMemoEntry); 4076 } 4077 if (self->output_buffer != NULL) { 4078 s = _PySys_GetSizeOf(self->output_buffer); 4079 if (s == -1) 4080 return -1; 4081 res += s; 4082 } 4083 return res; 4084 } 4085 4086 static struct PyMethodDef Pickler_methods[] = { 4087 _PICKLE_PICKLER_DUMP_METHODDEF 4088 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF 4089 _PICKLE_PICKLER___SIZEOF___METHODDEF 4090 {NULL, NULL} /* sentinel */ 4091 }; 4092 4093 static void 4094 Pickler_dealloc(PicklerObject *self) 4095 { 4096 PyObject_GC_UnTrack(self); 4097 4098 Py_XDECREF(self->output_buffer); 4099 Py_XDECREF(self->write); 4100 Py_XDECREF(self->pers_func); 4101 Py_XDECREF(self->dispatch_table); 4102 Py_XDECREF(self->fast_memo); 4103 4104 PyMemoTable_Del(self->memo); 4105 4106 Py_TYPE(self)->tp_free((PyObject *)self); 4107 } 4108 4109 static int 4110 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg) 4111 { 4112 Py_VISIT(self->write); 4113 Py_VISIT(self->pers_func); 4114 Py_VISIT(self->dispatch_table); 4115 Py_VISIT(self->fast_memo); 4116 return 0; 4117 } 4118 4119 static int 4120 Pickler_clear(PicklerObject *self) 4121 { 4122 Py_CLEAR(self->output_buffer); 4123 Py_CLEAR(self->write); 4124 Py_CLEAR(self->pers_func); 4125 Py_CLEAR(self->dispatch_table); 4126 Py_CLEAR(self->fast_memo); 4127 4128 if (self->memo != NULL) { 4129 PyMemoTable *memo = self->memo; 4130 self->memo = NULL; 4131 PyMemoTable_Del(memo); 4132 } 4133 return 0; 4134 } 4135 4136 4137 /*[clinic input] 4138 4139 _pickle.Pickler.__init__ 4140 4141 file: object 4142 protocol: object = NULL 4143 fix_imports: bool = True 4144 4145 This takes a binary file for writing a pickle data stream. 4146 4147 The optional *protocol* argument tells the pickler to use the given 4148 protocol; supported protocols are 0, 1, 2, 3 and 4. The default 4149 protocol is 3; a backward-incompatible protocol designed for Python 3. 4150 4151 Specifying a negative protocol version selects the highest protocol 4152 version supported. The higher the protocol used, the more recent the 4153 version of Python needed to read the pickle produced. 4154 4155 The *file* argument must have a write() method that accepts a single 4156 bytes argument. It can thus be a file object opened for binary 4157 writing, an io.BytesIO instance, or any other custom object that meets 4158 this interface. 4159 4160 If *fix_imports* is True and protocol is less than 3, pickle will try 4161 to map the new Python 3 names to the old module names used in Python 4162 2, so that the pickle data stream is readable with Python 2. 4163 [clinic start generated code]*/ 4164 4165 static int 4166 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file, 4167 PyObject *protocol, int fix_imports) 4168 /*[clinic end generated code: output=b5f31078dab17fb0 input=4faabdbc763c2389]*/ 4169 { 4170 _Py_IDENTIFIER(persistent_id); 4171 _Py_IDENTIFIER(dispatch_table); 4172 4173 /* In case of multiple __init__() calls, clear previous content. */ 4174 if (self->write != NULL) 4175 (void)Pickler_clear(self); 4176 4177 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0) 4178 return -1; 4179 4180 if (_Pickler_SetOutputStream(self, file) < 0) 4181 return -1; 4182 4183 /* memo and output_buffer may have already been created in _Pickler_New */ 4184 if (self->memo == NULL) { 4185 self->memo = PyMemoTable_New(); 4186 if (self->memo == NULL) 4187 return -1; 4188 } 4189 self->output_len = 0; 4190 if (self->output_buffer == NULL) { 4191 self->max_output_len = WRITE_BUF_SIZE; 4192 self->output_buffer = PyBytes_FromStringAndSize(NULL, 4193 self->max_output_len); 4194 if (self->output_buffer == NULL) 4195 return -1; 4196 } 4197 4198 self->fast = 0; 4199 self->fast_nesting = 0; 4200 self->fast_memo = NULL; 4201 self->pers_func = NULL; 4202 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) { 4203 self->pers_func = _PyObject_GetAttrId((PyObject *)self, 4204 &PyId_persistent_id); 4205 if (self->pers_func == NULL) 4206 return -1; 4207 } 4208 self->dispatch_table = NULL; 4209 if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) { 4210 self->dispatch_table = _PyObject_GetAttrId((PyObject *)self, 4211 &PyId_dispatch_table); 4212 if (self->dispatch_table == NULL) 4213 return -1; 4214 } 4215 4216 return 0; 4217 } 4218 4219 4220 /* Define a proxy object for the Pickler's internal memo object. This is to 4221 * avoid breaking code like: 4222 * pickler.memo.clear() 4223 * and 4224 * pickler.memo = saved_memo 4225 * Is this a good idea? Not really, but we don't want to break code that uses 4226 * it. Note that we don't implement the entire mapping API here. This is 4227 * intentional, as these should be treated as black-box implementation details. 4228 */ 4229 4230 /*[clinic input] 4231 _pickle.PicklerMemoProxy.clear 4232 4233 Remove all items from memo. 4234 [clinic start generated code]*/ 4235 4236 static PyObject * 4237 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self) 4238 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/ 4239 { 4240 if (self->pickler->memo) 4241 PyMemoTable_Clear(self->pickler->memo); 4242 Py_RETURN_NONE; 4243 } 4244 4245 /*[clinic input] 4246 _pickle.PicklerMemoProxy.copy 4247 4248 Copy the memo to a new object. 4249 [clinic start generated code]*/ 4250 4251 static PyObject * 4252 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self) 4253 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/ 4254 { 4255 Py_ssize_t i; 4256 PyMemoTable *memo; 4257 PyObject *new_memo = PyDict_New(); 4258 if (new_memo == NULL) 4259 return NULL; 4260 4261 memo = self->pickler->memo; 4262 for (i = 0; i < memo->mt_allocated; ++i) { 4263 PyMemoEntry entry = memo->mt_table[i]; 4264 if (entry.me_key != NULL) { 4265 int status; 4266 PyObject *key, *value; 4267 4268 key = PyLong_FromVoidPtr(entry.me_key); 4269 value = Py_BuildValue("nO", entry.me_value, entry.me_key); 4270 4271 if (key == NULL || value == NULL) { 4272 Py_XDECREF(key); 4273 Py_XDECREF(value); 4274 goto error; 4275 } 4276 status = PyDict_SetItem(new_memo, key, value); 4277 Py_DECREF(key); 4278 Py_DECREF(value); 4279 if (status < 0) 4280 goto error; 4281 } 4282 } 4283 return new_memo; 4284 4285 error: 4286 Py_XDECREF(new_memo); 4287 return NULL; 4288 } 4289 4290 /*[clinic input] 4291 _pickle.PicklerMemoProxy.__reduce__ 4292 4293 Implement pickle support. 4294 [clinic start generated code]*/ 4295 4296 static PyObject * 4297 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self) 4298 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/ 4299 { 4300 PyObject *reduce_value, *dict_args; 4301 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self); 4302 if (contents == NULL) 4303 return NULL; 4304 4305 reduce_value = PyTuple_New(2); 4306 if (reduce_value == NULL) { 4307 Py_DECREF(contents); 4308 return NULL; 4309 } 4310 dict_args = PyTuple_New(1); 4311 if (dict_args == NULL) { 4312 Py_DECREF(contents); 4313 Py_DECREF(reduce_value); 4314 return NULL; 4315 } 4316 PyTuple_SET_ITEM(dict_args, 0, contents); 4317 Py_INCREF((PyObject *)&PyDict_Type); 4318 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type); 4319 PyTuple_SET_ITEM(reduce_value, 1, dict_args); 4320 return reduce_value; 4321 } 4322 4323 static PyMethodDef picklerproxy_methods[] = { 4324 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF 4325 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF 4326 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF 4327 {NULL, NULL} /* sentinel */ 4328 }; 4329 4330 static void 4331 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self) 4332 { 4333 PyObject_GC_UnTrack(self); 4334 Py_XDECREF(self->pickler); 4335 PyObject_GC_Del((PyObject *)self); 4336 } 4337 4338 static int 4339 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self, 4340 visitproc visit, void *arg) 4341 { 4342 Py_VISIT(self->pickler); 4343 return 0; 4344 } 4345 4346 static int 4347 PicklerMemoProxy_clear(PicklerMemoProxyObject *self) 4348 { 4349 Py_CLEAR(self->pickler); 4350 return 0; 4351 } 4352 4353 static PyTypeObject PicklerMemoProxyType = { 4354 PyVarObject_HEAD_INIT(NULL, 0) 4355 "_pickle.PicklerMemoProxy", /*tp_name*/ 4356 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/ 4357 0, 4358 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */ 4359 0, /* tp_print */ 4360 0, /* tp_getattr */ 4361 0, /* tp_setattr */ 4362 0, /* tp_compare */ 4363 0, /* tp_repr */ 4364 0, /* tp_as_number */ 4365 0, /* tp_as_sequence */ 4366 0, /* tp_as_mapping */ 4367 PyObject_HashNotImplemented, /* tp_hash */ 4368 0, /* tp_call */ 4369 0, /* tp_str */ 4370 PyObject_GenericGetAttr, /* tp_getattro */ 4371 PyObject_GenericSetAttr, /* tp_setattro */ 4372 0, /* tp_as_buffer */ 4373 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, 4374 0, /* tp_doc */ 4375 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */ 4376 (inquiry)PicklerMemoProxy_clear, /* tp_clear */ 4377 0, /* tp_richcompare */ 4378 0, /* tp_weaklistoffset */ 4379 0, /* tp_iter */ 4380 0, /* tp_iternext */ 4381 picklerproxy_methods, /* tp_methods */ 4382 }; 4383 4384 static PyObject * 4385 PicklerMemoProxy_New(PicklerObject *pickler) 4386 { 4387 PicklerMemoProxyObject *self; 4388 4389 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType); 4390 if (self == NULL) 4391 return NULL; 4392 Py_INCREF(pickler); 4393 self->pickler = pickler; 4394 PyObject_GC_Track(self); 4395 return (PyObject *)self; 4396 } 4397 4398 /*****************************************************************************/ 4399 4400 static PyObject * 4401 Pickler_get_memo(PicklerObject *self) 4402 { 4403 return PicklerMemoProxy_New(self); 4404 } 4405 4406 static int 4407 Pickler_set_memo(PicklerObject *self, PyObject *obj) 4408 { 4409 PyMemoTable *new_memo = NULL; 4410 4411 if (obj == NULL) { 4412 PyErr_SetString(PyExc_TypeError, 4413 "attribute deletion is not supported"); 4414 return -1; 4415 } 4416 4417 if (Py_TYPE(obj) == &PicklerMemoProxyType) { 4418 PicklerObject *pickler = 4419 ((PicklerMemoProxyObject *)obj)->pickler; 4420 4421 new_memo = PyMemoTable_Copy(pickler->memo); 4422 if (new_memo == NULL) 4423 return -1; 4424 } 4425 else if (PyDict_Check(obj)) { 4426 Py_ssize_t i = 0; 4427 PyObject *key, *value; 4428 4429 new_memo = PyMemoTable_New(); 4430 if (new_memo == NULL) 4431 return -1; 4432 4433 while (PyDict_Next(obj, &i, &key, &value)) { 4434 Py_ssize_t memo_id; 4435 PyObject *memo_obj; 4436 4437 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) { 4438 PyErr_SetString(PyExc_TypeError, 4439 "'memo' values must be 2-item tuples"); 4440 goto error; 4441 } 4442 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0)); 4443 if (memo_id == -1 && PyErr_Occurred()) 4444 goto error; 4445 memo_obj = PyTuple_GET_ITEM(value, 1); 4446 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0) 4447 goto error; 4448 } 4449 } 4450 else { 4451 PyErr_Format(PyExc_TypeError, 4452 "'memo' attribute must be a PicklerMemoProxy object" 4453 "or dict, not %.200s", Py_TYPE(obj)->tp_name); 4454 return -1; 4455 } 4456 4457 PyMemoTable_Del(self->memo); 4458 self->memo = new_memo; 4459 4460 return 0; 4461 4462 error: 4463 if (new_memo) 4464 PyMemoTable_Del(new_memo); 4465 return -1; 4466 } 4467 4468 static PyObject * 4469 Pickler_get_persid(PicklerObject *self) 4470 { 4471 if (self->pers_func == NULL) 4472 PyErr_SetString(PyExc_AttributeError, "persistent_id"); 4473 else 4474 Py_INCREF(self->pers_func); 4475 return self->pers_func; 4476 } 4477 4478 static int 4479 Pickler_set_persid(PicklerObject *self, PyObject *value) 4480 { 4481 if (value == NULL) { 4482 PyErr_SetString(PyExc_TypeError, 4483 "attribute deletion is not supported"); 4484 return -1; 4485 } 4486 if (!PyCallable_Check(value)) { 4487 PyErr_SetString(PyExc_TypeError, 4488 "persistent_id must be a callable taking one argument"); 4489 return -1; 4490 } 4491 4492 Py_INCREF(value); 4493 Py_XSETREF(self->pers_func, value); 4494 4495 return 0; 4496 } 4497 4498 static PyMemberDef Pickler_members[] = { 4499 {"bin", T_INT, offsetof(PicklerObject, bin)}, 4500 {"fast", T_INT, offsetof(PicklerObject, fast)}, 4501 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)}, 4502 {NULL} 4503 }; 4504 4505 static PyGetSetDef Pickler_getsets[] = { 4506 {"memo", (getter)Pickler_get_memo, 4507 (setter)Pickler_set_memo}, 4508 {"persistent_id", (getter)Pickler_get_persid, 4509 (setter)Pickler_set_persid}, 4510 {NULL} 4511 }; 4512 4513 static PyTypeObject Pickler_Type = { 4514 PyVarObject_HEAD_INIT(NULL, 0) 4515 "_pickle.Pickler" , /*tp_name*/ 4516 sizeof(PicklerObject), /*tp_basicsize*/ 4517 0, /*tp_itemsize*/ 4518 (destructor)Pickler_dealloc, /*tp_dealloc*/ 4519 0, /*tp_print*/ 4520 0, /*tp_getattr*/ 4521 0, /*tp_setattr*/ 4522 0, /*tp_reserved*/ 4523 0, /*tp_repr*/ 4524 0, /*tp_as_number*/ 4525 0, /*tp_as_sequence*/ 4526 0, /*tp_as_mapping*/ 4527 0, /*tp_hash*/ 4528 0, /*tp_call*/ 4529 0, /*tp_str*/ 4530 0, /*tp_getattro*/ 4531 0, /*tp_setattro*/ 4532 0, /*tp_as_buffer*/ 4533 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, 4534 _pickle_Pickler___init____doc__, /*tp_doc*/ 4535 (traverseproc)Pickler_traverse, /*tp_traverse*/ 4536 (inquiry)Pickler_clear, /*tp_clear*/ 4537 0, /*tp_richcompare*/ 4538 0, /*tp_weaklistoffset*/ 4539 0, /*tp_iter*/ 4540 0, /*tp_iternext*/ 4541 Pickler_methods, /*tp_methods*/ 4542 Pickler_members, /*tp_members*/ 4543 Pickler_getsets, /*tp_getset*/ 4544 0, /*tp_base*/ 4545 0, /*tp_dict*/ 4546 0, /*tp_descr_get*/ 4547 0, /*tp_descr_set*/ 4548 0, /*tp_dictoffset*/ 4549 _pickle_Pickler___init__, /*tp_init*/ 4550 PyType_GenericAlloc, /*tp_alloc*/ 4551 PyType_GenericNew, /*tp_new*/ 4552 PyObject_GC_Del, /*tp_free*/ 4553 0, /*tp_is_gc*/ 4554 }; 4555 4556 /* Temporary helper for calling self.find_class(). 4557 4558 XXX: It would be nice to able to avoid Python function call overhead, by 4559 using directly the C version of find_class(), when find_class() is not 4560 overridden by a subclass. Although, this could become rather hackish. A 4561 simpler optimization would be to call the C function when self is not a 4562 subclass instance. */ 4563 static PyObject * 4564 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name) 4565 { 4566 _Py_IDENTIFIER(find_class); 4567 4568 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO", 4569 module_name, global_name); 4570 } 4571 4572 static Py_ssize_t 4573 marker(UnpicklerObject *self) 4574 { 4575 Py_ssize_t mark; 4576 4577 if (self->num_marks < 1) { 4578 PickleState *st = _Pickle_GetGlobalState(); 4579 PyErr_SetString(st->UnpicklingError, "could not find MARK"); 4580 return -1; 4581 } 4582 4583 mark = self->marks[--self->num_marks]; 4584 self->stack->mark_set = self->num_marks != 0; 4585 self->stack->fence = self->num_marks ? 4586 self->marks[self->num_marks - 1] : 0; 4587 return mark; 4588 } 4589 4590 static int 4591 load_none(UnpicklerObject *self) 4592 { 4593 PDATA_APPEND(self->stack, Py_None, -1); 4594 return 0; 4595 } 4596 4597 static int 4598 load_int(UnpicklerObject *self) 4599 { 4600 PyObject *value; 4601 char *endptr, *s; 4602 Py_ssize_t len; 4603 long x; 4604 4605 if ((len = _Unpickler_Readline(self, &s)) < 0) 4606 return -1; 4607 if (len < 2) 4608 return bad_readline(); 4609 4610 errno = 0; 4611 /* XXX: Should the base argument of strtol() be explicitly set to 10? 4612 XXX(avassalotti): Should this uses PyOS_strtol()? */ 4613 x = strtol(s, &endptr, 0); 4614 4615 if (errno || (*endptr != '\n' && *endptr != '\0')) { 4616 /* Hm, maybe we've got something long. Let's try reading 4617 * it as a Python int object. */ 4618 errno = 0; 4619 /* XXX: Same thing about the base here. */ 4620 value = PyLong_FromString(s, NULL, 0); 4621 if (value == NULL) { 4622 PyErr_SetString(PyExc_ValueError, 4623 "could not convert string to int"); 4624 return -1; 4625 } 4626 } 4627 else { 4628 if (len == 3 && (x == 0 || x == 1)) { 4629 if ((value = PyBool_FromLong(x)) == NULL) 4630 return -1; 4631 } 4632 else { 4633 if ((value = PyLong_FromLong(x)) == NULL) 4634 return -1; 4635 } 4636 } 4637 4638 PDATA_PUSH(self->stack, value, -1); 4639 return 0; 4640 } 4641 4642 static int 4643 load_bool(UnpicklerObject *self, PyObject *boolean) 4644 { 4645 assert(boolean == Py_True || boolean == Py_False); 4646 PDATA_APPEND(self->stack, boolean, -1); 4647 return 0; 4648 } 4649 4650 /* s contains x bytes of an unsigned little-endian integer. Return its value 4651 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX. 4652 */ 4653 static Py_ssize_t 4654 calc_binsize(char *bytes, int nbytes) 4655 { 4656 unsigned char *s = (unsigned char *)bytes; 4657 int i; 4658 size_t x = 0; 4659 4660 if (nbytes > (int)sizeof(size_t)) { 4661 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes 4662 * have 64-bit size that can't be represented on 32-bit platform. 4663 */ 4664 for (i = (int)sizeof(size_t); i < nbytes; i++) { 4665 if (s[i]) 4666 return -1; 4667 } 4668 nbytes = (int)sizeof(size_t); 4669 } 4670 for (i = 0; i < nbytes; i++) { 4671 x |= (size_t) s[i] << (8 * i); 4672 } 4673 4674 if (x > PY_SSIZE_T_MAX) 4675 return -1; 4676 else 4677 return (Py_ssize_t) x; 4678 } 4679 4680 /* s contains x bytes of a little-endian integer. Return its value as a 4681 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian 4682 * int, but when x is 4 it's a signed one. This is a historical source 4683 * of x-platform bugs. 4684 */ 4685 static long 4686 calc_binint(char *bytes, int nbytes) 4687 { 4688 unsigned char *s = (unsigned char *)bytes; 4689 Py_ssize_t i; 4690 long x = 0; 4691 4692 for (i = 0; i < nbytes; i++) { 4693 x |= (long)s[i] << (8 * i); 4694 } 4695 4696 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4) 4697 * is signed, so on a box with longs bigger than 4 bytes we need 4698 * to extend a BININT's sign bit to the full width. 4699 */ 4700 if (SIZEOF_LONG > 4 && nbytes == 4) { 4701 x |= -(x & (1L << 31)); 4702 } 4703 4704 return x; 4705 } 4706 4707 static int 4708 load_binintx(UnpicklerObject *self, char *s, int size) 4709 { 4710 PyObject *value; 4711 long x; 4712 4713 x = calc_binint(s, size); 4714 4715 if ((value = PyLong_FromLong(x)) == NULL) 4716 return -1; 4717 4718 PDATA_PUSH(self->stack, value, -1); 4719 return 0; 4720 } 4721 4722 static int 4723 load_binint(UnpicklerObject *self) 4724 { 4725 char *s; 4726 4727 if (_Unpickler_Read(self, &s, 4) < 0) 4728 return -1; 4729 4730 return load_binintx(self, s, 4); 4731 } 4732 4733 static int 4734 load_binint1(UnpicklerObject *self) 4735 { 4736 char *s; 4737 4738 if (_Unpickler_Read(self, &s, 1) < 0) 4739 return -1; 4740 4741 return load_binintx(self, s, 1); 4742 } 4743 4744 static int 4745 load_binint2(UnpicklerObject *self) 4746 { 4747 char *s; 4748 4749 if (_Unpickler_Read(self, &s, 2) < 0) 4750 return -1; 4751 4752 return load_binintx(self, s, 2); 4753 } 4754 4755 static int 4756 load_long(UnpicklerObject *self) 4757 { 4758 PyObject *value; 4759 char *s; 4760 Py_ssize_t len; 4761 4762 if ((len = _Unpickler_Readline(self, &s)) < 0) 4763 return -1; 4764 if (len < 2) 4765 return bad_readline(); 4766 4767 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove 4768 the 'L' before calling PyLong_FromString. In order to maintain 4769 compatibility with Python 3.0.0, we don't actually *require* 4770 the 'L' to be present. */ 4771 if (s[len-2] == 'L') 4772 s[len-2] = '\0'; 4773 /* XXX: Should the base argument explicitly set to 10? */ 4774 value = PyLong_FromString(s, NULL, 0); 4775 if (value == NULL) 4776 return -1; 4777 4778 PDATA_PUSH(self->stack, value, -1); 4779 return 0; 4780 } 4781 4782 /* 'size' bytes contain the # of bytes of little-endian 256's-complement 4783 * data following. 4784 */ 4785 static int 4786 load_counted_long(UnpicklerObject *self, int size) 4787 { 4788 PyObject *value; 4789 char *nbytes; 4790 char *pdata; 4791 4792 assert(size == 1 || size == 4); 4793 if (_Unpickler_Read(self, &nbytes, size) < 0) 4794 return -1; 4795 4796 size = calc_binint(nbytes, size); 4797 if (size < 0) { 4798 PickleState *st = _Pickle_GetGlobalState(); 4799 /* Corrupt or hostile pickle -- we never write one like this */ 4800 PyErr_SetString(st->UnpicklingError, 4801 "LONG pickle has negative byte count"); 4802 return -1; 4803 } 4804 4805 if (size == 0) 4806 value = PyLong_FromLong(0L); 4807 else { 4808 /* Read the raw little-endian bytes and convert. */ 4809 if (_Unpickler_Read(self, &pdata, size) < 0) 4810 return -1; 4811 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size, 4812 1 /* little endian */ , 1 /* signed */ ); 4813 } 4814 if (value == NULL) 4815 return -1; 4816 PDATA_PUSH(self->stack, value, -1); 4817 return 0; 4818 } 4819 4820 static int 4821 load_float(UnpicklerObject *self) 4822 { 4823 PyObject *value; 4824 char *endptr, *s; 4825 Py_ssize_t len; 4826 double d; 4827 4828 if ((len = _Unpickler_Readline(self, &s)) < 0) 4829 return -1; 4830 if (len < 2) 4831 return bad_readline(); 4832 4833 errno = 0; 4834 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError); 4835 if (d == -1.0 && PyErr_Occurred()) 4836 return -1; 4837 if ((endptr[0] != '\n') && (endptr[0] != '\0')) { 4838 PyErr_SetString(PyExc_ValueError, "could not convert string to float"); 4839 return -1; 4840 } 4841 value = PyFloat_FromDouble(d); 4842 if (value == NULL) 4843 return -1; 4844 4845 PDATA_PUSH(self->stack, value, -1); 4846 return 0; 4847 } 4848 4849 static int 4850 load_binfloat(UnpicklerObject *self) 4851 { 4852 PyObject *value; 4853 double x; 4854 char *s; 4855 4856 if (_Unpickler_Read(self, &s, 8) < 0) 4857 return -1; 4858 4859 x = _PyFloat_Unpack8((unsigned char *)s, 0); 4860 if (x == -1.0 && PyErr_Occurred()) 4861 return -1; 4862 4863 if ((value = PyFloat_FromDouble(x)) == NULL) 4864 return -1; 4865 4866 PDATA_PUSH(self->stack, value, -1); 4867 return 0; 4868 } 4869 4870 static int 4871 load_string(UnpicklerObject *self) 4872 { 4873 PyObject *bytes; 4874 PyObject *obj; 4875 Py_ssize_t len; 4876 char *s, *p; 4877 4878 if ((len = _Unpickler_Readline(self, &s)) < 0) 4879 return -1; 4880 /* Strip the newline */ 4881 len--; 4882 /* Strip outermost quotes */ 4883 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) { 4884 p = s + 1; 4885 len -= 2; 4886 } 4887 else { 4888 PickleState *st = _Pickle_GetGlobalState(); 4889 PyErr_SetString(st->UnpicklingError, 4890 "the STRING opcode argument must be quoted"); 4891 return -1; 4892 } 4893 assert(len >= 0); 4894 4895 /* Use the PyBytes API to decode the string, since that is what is used 4896 to encode, and then coerce the result to Unicode. */ 4897 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL); 4898 if (bytes == NULL) 4899 return -1; 4900 4901 /* Leave the Python 2.x strings as bytes if the *encoding* given to the 4902 Unpickler was 'bytes'. Otherwise, convert them to unicode. */ 4903 if (strcmp(self->encoding, "bytes") == 0) { 4904 obj = bytes; 4905 } 4906 else { 4907 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors); 4908 Py_DECREF(bytes); 4909 if (obj == NULL) { 4910 return -1; 4911 } 4912 } 4913 4914 PDATA_PUSH(self->stack, obj, -1); 4915 return 0; 4916 } 4917 4918 static int 4919 load_counted_binstring(UnpicklerObject *self, int nbytes) 4920 { 4921 PyObject *obj; 4922 Py_ssize_t size; 4923 char *s; 4924 4925 if (_Unpickler_Read(self, &s, nbytes) < 0) 4926 return -1; 4927 4928 size = calc_binsize(s, nbytes); 4929 if (size < 0) { 4930 PickleState *st = _Pickle_GetGlobalState(); 4931 PyErr_Format(st->UnpicklingError, 4932 "BINSTRING exceeds system's maximum size of %zd bytes", 4933 PY_SSIZE_T_MAX); 4934 return -1; 4935 } 4936 4937 if (_Unpickler_Read(self, &s, size) < 0) 4938 return -1; 4939 4940 /* Convert Python 2.x strings to bytes if the *encoding* given to the 4941 Unpickler was 'bytes'. Otherwise, convert them to unicode. */ 4942 if (strcmp(self->encoding, "bytes") == 0) { 4943 obj = PyBytes_FromStringAndSize(s, size); 4944 } 4945 else { 4946 obj = PyUnicode_Decode(s, size, self->encoding, self->errors); 4947 } 4948 if (obj == NULL) { 4949 return -1; 4950 } 4951 4952 PDATA_PUSH(self->stack, obj, -1); 4953 return 0; 4954 } 4955 4956 static int 4957 load_counted_binbytes(UnpicklerObject *self, int nbytes) 4958 { 4959 PyObject *bytes; 4960 Py_ssize_t size; 4961 char *s; 4962 4963 if (_Unpickler_Read(self, &s, nbytes) < 0) 4964 return -1; 4965 4966 size = calc_binsize(s, nbytes); 4967 if (size < 0) { 4968 PyErr_Format(PyExc_OverflowError, 4969 "BINBYTES exceeds system's maximum size of %zd bytes", 4970 PY_SSIZE_T_MAX); 4971 return -1; 4972 } 4973 4974 if (_Unpickler_Read(self, &s, size) < 0) 4975 return -1; 4976 4977 bytes = PyBytes_FromStringAndSize(s, size); 4978 if (bytes == NULL) 4979 return -1; 4980 4981 PDATA_PUSH(self->stack, bytes, -1); 4982 return 0; 4983 } 4984 4985 static int 4986 load_unicode(UnpicklerObject *self) 4987 { 4988 PyObject *str; 4989 Py_ssize_t len; 4990 char *s; 4991 4992 if ((len = _Unpickler_Readline(self, &s)) < 0) 4993 return -1; 4994 if (len < 1) 4995 return bad_readline(); 4996 4997 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL); 4998 if (str == NULL) 4999 return -1; 5000 5001 PDATA_PUSH(self->stack, str, -1); 5002 return 0; 5003 } 5004 5005 static int 5006 load_counted_binunicode(UnpicklerObject *self, int nbytes) 5007 { 5008 PyObject *str; 5009 Py_ssize_t size; 5010 char *s; 5011 5012 if (_Unpickler_Read(self, &s, nbytes) < 0) 5013 return -1; 5014 5015 size = calc_binsize(s, nbytes); 5016 if (size < 0) { 5017 PyErr_Format(PyExc_OverflowError, 5018 "BINUNICODE exceeds system's maximum size of %zd bytes", 5019 PY_SSIZE_T_MAX); 5020 return -1; 5021 } 5022 5023 if (_Unpickler_Read(self, &s, size) < 0) 5024 return -1; 5025 5026 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass"); 5027 if (str == NULL) 5028 return -1; 5029 5030 PDATA_PUSH(self->stack, str, -1); 5031 return 0; 5032 } 5033 5034 static int 5035 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len) 5036 { 5037 PyObject *tuple; 5038 5039 if (Py_SIZE(self->stack) < len) 5040 return Pdata_stack_underflow(self->stack); 5041 5042 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len); 5043 if (tuple == NULL) 5044 return -1; 5045 PDATA_PUSH(self->stack, tuple, -1); 5046 return 0; 5047 } 5048 5049 static int 5050 load_tuple(UnpicklerObject *self) 5051 { 5052 Py_ssize_t i; 5053 5054 if ((i = marker(self)) < 0) 5055 return -1; 5056 5057 return load_counted_tuple(self, Py_SIZE(self->stack) - i); 5058 } 5059 5060 static int 5061 load_empty_list(UnpicklerObject *self) 5062 { 5063 PyObject *list; 5064 5065 if ((list = PyList_New(0)) == NULL) 5066 return -1; 5067 PDATA_PUSH(self->stack, list, -1); 5068 return 0; 5069 } 5070 5071 static int 5072 load_empty_dict(UnpicklerObject *self) 5073 { 5074 PyObject *dict; 5075 5076 if ((dict = PyDict_New()) == NULL) 5077 return -1; 5078 PDATA_PUSH(self->stack, dict, -1); 5079 return 0; 5080 } 5081 5082 static int 5083 load_empty_set(UnpicklerObject *self) 5084 { 5085 PyObject *set; 5086 5087 if ((set = PySet_New(NULL)) == NULL) 5088 return -1; 5089 PDATA_PUSH(self->stack, set, -1); 5090 return 0; 5091 } 5092 5093 static int 5094 load_list(UnpicklerObject *self) 5095 { 5096 PyObject *list; 5097 Py_ssize_t i; 5098 5099 if ((i = marker(self)) < 0) 5100 return -1; 5101 5102 list = Pdata_poplist(self->stack, i); 5103 if (list == NULL) 5104 return -1; 5105 PDATA_PUSH(self->stack, list, -1); 5106 return 0; 5107 } 5108 5109 static int 5110 load_dict(UnpicklerObject *self) 5111 { 5112 PyObject *dict, *key, *value; 5113 Py_ssize_t i, j, k; 5114 5115 if ((i = marker(self)) < 0) 5116 return -1; 5117 j = Py_SIZE(self->stack); 5118 5119 if ((dict = PyDict_New()) == NULL) 5120 return -1; 5121 5122 if ((j - i) % 2 != 0) { 5123 PickleState *st = _Pickle_GetGlobalState(); 5124 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT"); 5125 Py_DECREF(dict); 5126 return -1; 5127 } 5128 5129 for (k = i + 1; k < j; k += 2) { 5130 key = self->stack->data[k - 1]; 5131 value = self->stack->data[k]; 5132 if (PyDict_SetItem(dict, key, value) < 0) { 5133 Py_DECREF(dict); 5134 return -1; 5135 } 5136 } 5137 Pdata_clear(self->stack, i); 5138 PDATA_PUSH(self->stack, dict, -1); 5139 return 0; 5140 } 5141 5142 static int 5143 load_frozenset(UnpicklerObject *self) 5144 { 5145 PyObject *items; 5146 PyObject *frozenset; 5147 Py_ssize_t i; 5148 5149 if ((i = marker(self)) < 0) 5150 return -1; 5151 5152 items = Pdata_poptuple(self->stack, i); 5153 if (items == NULL) 5154 return -1; 5155 5156 frozenset = PyFrozenSet_New(items); 5157 Py_DECREF(items); 5158 if (frozenset == NULL) 5159 return -1; 5160 5161 PDATA_PUSH(self->stack, frozenset, -1); 5162 return 0; 5163 } 5164 5165 static PyObject * 5166 instantiate(PyObject *cls, PyObject *args) 5167 { 5168 PyObject *result = NULL; 5169 _Py_IDENTIFIER(__getinitargs__); 5170 /* Caller must assure args are a tuple. Normally, args come from 5171 Pdata_poptuple which packs objects from the top of the stack 5172 into a newly created tuple. */ 5173 assert(PyTuple_Check(args)); 5174 if (Py_SIZE(args) > 0 || !PyType_Check(cls) || 5175 _PyObject_HasAttrId(cls, &PyId___getinitargs__)) { 5176 result = PyObject_CallObject(cls, args); 5177 } 5178 else { 5179 _Py_IDENTIFIER(__new__); 5180 5181 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls); 5182 } 5183 return result; 5184 } 5185 5186 static int 5187 load_obj(UnpicklerObject *self) 5188 { 5189 PyObject *cls, *args, *obj = NULL; 5190 Py_ssize_t i; 5191 5192 if ((i = marker(self)) < 0) 5193 return -1; 5194 5195 if (Py_SIZE(self->stack) - i < 1) 5196 return Pdata_stack_underflow(self->stack); 5197 5198 args = Pdata_poptuple(self->stack, i + 1); 5199 if (args == NULL) 5200 return -1; 5201 5202 PDATA_POP(self->stack, cls); 5203 if (cls) { 5204 obj = instantiate(cls, args); 5205 Py_DECREF(cls); 5206 } 5207 Py_DECREF(args); 5208 if (obj == NULL) 5209 return -1; 5210 5211 PDATA_PUSH(self->stack, obj, -1); 5212 return 0; 5213 } 5214 5215 static int 5216 load_inst(UnpicklerObject *self) 5217 { 5218 PyObject *cls = NULL; 5219 PyObject *args = NULL; 5220 PyObject *obj = NULL; 5221 PyObject *module_name; 5222 PyObject *class_name; 5223 Py_ssize_t len; 5224 Py_ssize_t i; 5225 char *s; 5226 5227 if ((i = marker(self)) < 0) 5228 return -1; 5229 if ((len = _Unpickler_Readline(self, &s)) < 0) 5230 return -1; 5231 if (len < 2) 5232 return bad_readline(); 5233 5234 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII 5235 identifiers are permitted in Python 3.0, since the INST opcode is only 5236 supported by older protocols on Python 2.x. */ 5237 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict"); 5238 if (module_name == NULL) 5239 return -1; 5240 5241 if ((len = _Unpickler_Readline(self, &s)) >= 0) { 5242 if (len < 2) { 5243 Py_DECREF(module_name); 5244 return bad_readline(); 5245 } 5246 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict"); 5247 if (class_name != NULL) { 5248 cls = find_class(self, module_name, class_name); 5249 Py_DECREF(class_name); 5250 } 5251 } 5252 Py_DECREF(module_name); 5253 5254 if (cls == NULL) 5255 return -1; 5256 5257 if ((args = Pdata_poptuple(self->stack, i)) != NULL) { 5258 obj = instantiate(cls, args); 5259 Py_DECREF(args); 5260 } 5261 Py_DECREF(cls); 5262 5263 if (obj == NULL) 5264 return -1; 5265 5266 PDATA_PUSH(self->stack, obj, -1); 5267 return 0; 5268 } 5269 5270 static int 5271 load_newobj(UnpicklerObject *self) 5272 { 5273 PyObject *args = NULL; 5274 PyObject *clsraw = NULL; 5275 PyTypeObject *cls; /* clsraw cast to its true type */ 5276 PyObject *obj; 5277 PickleState *st = _Pickle_GetGlobalState(); 5278 5279 /* Stack is ... cls argtuple, and we want to call 5280 * cls.__new__(cls, *argtuple). 5281 */ 5282 PDATA_POP(self->stack, args); 5283 if (args == NULL) 5284 goto error; 5285 if (!PyTuple_Check(args)) { 5286 PyErr_SetString(st->UnpicklingError, 5287 "NEWOBJ expected an arg " "tuple."); 5288 goto error; 5289 } 5290 5291 PDATA_POP(self->stack, clsraw); 5292 cls = (PyTypeObject *)clsraw; 5293 if (cls == NULL) 5294 goto error; 5295 if (!PyType_Check(cls)) { 5296 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument " 5297 "isn't a type object"); 5298 goto error; 5299 } 5300 if (cls->tp_new == NULL) { 5301 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument " 5302 "has NULL tp_new"); 5303 goto error; 5304 } 5305 5306 /* Call __new__. */ 5307 obj = cls->tp_new(cls, args, NULL); 5308 if (obj == NULL) 5309 goto error; 5310 5311 Py_DECREF(args); 5312 Py_DECREF(clsraw); 5313 PDATA_PUSH(self->stack, obj, -1); 5314 return 0; 5315 5316 error: 5317 Py_XDECREF(args); 5318 Py_XDECREF(clsraw); 5319 return -1; 5320 } 5321 5322 static int 5323 load_newobj_ex(UnpicklerObject *self) 5324 { 5325 PyObject *cls, *args, *kwargs; 5326 PyObject *obj; 5327 PickleState *st = _Pickle_GetGlobalState(); 5328 5329 PDATA_POP(self->stack, kwargs); 5330 if (kwargs == NULL) { 5331 return -1; 5332 } 5333 PDATA_POP(self->stack, args); 5334 if (args == NULL) { 5335 Py_DECREF(kwargs); 5336 return -1; 5337 } 5338 PDATA_POP(self->stack, cls); 5339 if (cls == NULL) { 5340 Py_DECREF(kwargs); 5341 Py_DECREF(args); 5342 return -1; 5343 } 5344 5345 if (!PyType_Check(cls)) { 5346 Py_DECREF(kwargs); 5347 Py_DECREF(args); 5348 PyErr_Format(st->UnpicklingError, 5349 "NEWOBJ_EX class argument must be a type, not %.200s", 5350 Py_TYPE(cls)->tp_name); 5351 Py_DECREF(cls); 5352 return -1; 5353 } 5354 5355 if (((PyTypeObject *)cls)->tp_new == NULL) { 5356 Py_DECREF(kwargs); 5357 Py_DECREF(args); 5358 Py_DECREF(cls); 5359 PyErr_SetString(st->UnpicklingError, 5360 "NEWOBJ_EX class argument doesn't have __new__"); 5361 return -1; 5362 } 5363 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs); 5364 Py_DECREF(kwargs); 5365 Py_DECREF(args); 5366 Py_DECREF(cls); 5367 if (obj == NULL) { 5368 return -1; 5369 } 5370 PDATA_PUSH(self->stack, obj, -1); 5371 return 0; 5372 } 5373 5374 static int 5375 load_global(UnpicklerObject *self) 5376 { 5377 PyObject *global = NULL; 5378 PyObject *module_name; 5379 PyObject *global_name; 5380 Py_ssize_t len; 5381 char *s; 5382 5383 if ((len = _Unpickler_Readline(self, &s)) < 0) 5384 return -1; 5385 if (len < 2) 5386 return bad_readline(); 5387 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict"); 5388 if (!module_name) 5389 return -1; 5390 5391 if ((len = _Unpickler_Readline(self, &s)) >= 0) { 5392 if (len < 2) { 5393 Py_DECREF(module_name); 5394 return bad_readline(); 5395 } 5396 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict"); 5397 if (global_name) { 5398 global = find_class(self, module_name, global_name); 5399 Py_DECREF(global_name); 5400 } 5401 } 5402 Py_DECREF(module_name); 5403 5404 if (global == NULL) 5405 return -1; 5406 PDATA_PUSH(self->stack, global, -1); 5407 return 0; 5408 } 5409 5410 static int 5411 load_stack_global(UnpicklerObject *self) 5412 { 5413 PyObject *global; 5414 PyObject *module_name; 5415 PyObject *global_name; 5416 5417 PDATA_POP(self->stack, global_name); 5418 PDATA_POP(self->stack, module_name); 5419 if (module_name == NULL || !PyUnicode_CheckExact(module_name) || 5420 global_name == NULL || !PyUnicode_CheckExact(global_name)) { 5421 PickleState *st = _Pickle_GetGlobalState(); 5422 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str"); 5423 Py_XDECREF(global_name); 5424 Py_XDECREF(module_name); 5425 return -1; 5426 } 5427 global = find_class(self, module_name, global_name); 5428 Py_DECREF(global_name); 5429 Py_DECREF(module_name); 5430 if (global == NULL) 5431 return -1; 5432 PDATA_PUSH(self->stack, global, -1); 5433 return 0; 5434 } 5435 5436 static int 5437 load_persid(UnpicklerObject *self) 5438 { 5439 PyObject *pid; 5440 Py_ssize_t len; 5441 char *s; 5442 5443 if (self->pers_func) { 5444 if ((len = _Unpickler_Readline(self, &s)) < 0) 5445 return -1; 5446 if (len < 1) 5447 return bad_readline(); 5448 5449 pid = PyUnicode_DecodeASCII(s, len - 1, "strict"); 5450 if (pid == NULL) { 5451 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { 5452 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError, 5453 "persistent IDs in protocol 0 must be " 5454 "ASCII strings"); 5455 } 5456 return -1; 5457 } 5458 5459 /* This does not leak since _Pickle_FastCall() steals the reference 5460 to pid first. */ 5461 pid = _Pickle_FastCall(self->pers_func, pid); 5462 if (pid == NULL) 5463 return -1; 5464 5465 PDATA_PUSH(self->stack, pid, -1); 5466 return 0; 5467 } 5468 else { 5469 PickleState *st = _Pickle_GetGlobalState(); 5470 PyErr_SetString(st->UnpicklingError, 5471 "A load persistent id instruction was encountered,\n" 5472 "but no persistent_load function was specified."); 5473 return -1; 5474 } 5475 } 5476 5477 static int 5478 load_binpersid(UnpicklerObject *self) 5479 { 5480 PyObject *pid; 5481 5482 if (self->pers_func) { 5483 PDATA_POP(self->stack, pid); 5484 if (pid == NULL) 5485 return -1; 5486 5487 /* This does not leak since _Pickle_FastCall() steals the 5488 reference to pid first. */ 5489 pid = _Pickle_FastCall(self->pers_func, pid); 5490 if (pid == NULL) 5491 return -1; 5492 5493 PDATA_PUSH(self->stack, pid, -1); 5494 return 0; 5495 } 5496 else { 5497 PickleState *st = _Pickle_GetGlobalState(); 5498 PyErr_SetString(st->UnpicklingError, 5499 "A load persistent id instruction was encountered,\n" 5500 "but no persistent_load function was specified."); 5501 return -1; 5502 } 5503 } 5504 5505 static int 5506 load_pop(UnpicklerObject *self) 5507 { 5508 Py_ssize_t len = Py_SIZE(self->stack); 5509 5510 /* Note that we split the (pickle.py) stack into two stacks, 5511 * an object stack and a mark stack. We have to be clever and 5512 * pop the right one. We do this by looking at the top of the 5513 * mark stack first, and only signalling a stack underflow if 5514 * the object stack is empty and the mark stack doesn't match 5515 * our expectations. 5516 */ 5517 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) { 5518 self->num_marks--; 5519 self->stack->mark_set = self->num_marks != 0; 5520 self->stack->fence = self->num_marks ? 5521 self->marks[self->num_marks - 1] : 0; 5522 } else if (len <= self->stack->fence) 5523 return Pdata_stack_underflow(self->stack); 5524 else { 5525 len--; 5526 Py_DECREF(self->stack->data[len]); 5527 Py_SIZE(self->stack) = len; 5528 } 5529 return 0; 5530 } 5531 5532 static int 5533 load_pop_mark(UnpicklerObject *self) 5534 { 5535 Py_ssize_t i; 5536 5537 if ((i = marker(self)) < 0) 5538 return -1; 5539 5540 Pdata_clear(self->stack, i); 5541 5542 return 0; 5543 } 5544 5545 static int 5546 load_dup(UnpicklerObject *self) 5547 { 5548 PyObject *last; 5549 Py_ssize_t len = Py_SIZE(self->stack); 5550 5551 if (len <= self->stack->fence) 5552 return Pdata_stack_underflow(self->stack); 5553 last = self->stack->data[len - 1]; 5554 PDATA_APPEND(self->stack, last, -1); 5555 return 0; 5556 } 5557 5558 static int 5559 load_get(UnpicklerObject *self) 5560 { 5561 PyObject *key, *value; 5562 Py_ssize_t idx; 5563 Py_ssize_t len; 5564 char *s; 5565 5566 if ((len = _Unpickler_Readline(self, &s)) < 0) 5567 return -1; 5568 if (len < 2) 5569 return bad_readline(); 5570 5571 key = PyLong_FromString(s, NULL, 10); 5572 if (key == NULL) 5573 return -1; 5574 idx = PyLong_AsSsize_t(key); 5575 if (idx == -1 && PyErr_Occurred()) { 5576 Py_DECREF(key); 5577 return -1; 5578 } 5579 5580 value = _Unpickler_MemoGet(self, idx); 5581 if (value == NULL) { 5582 if (!PyErr_Occurred()) 5583 PyErr_SetObject(PyExc_KeyError, key); 5584 Py_DECREF(key); 5585 return -1; 5586 } 5587 Py_DECREF(key); 5588 5589 PDATA_APPEND(self->stack, value, -1); 5590 return 0; 5591 } 5592 5593 static int 5594 load_binget(UnpicklerObject *self) 5595 { 5596 PyObject *value; 5597 Py_ssize_t idx; 5598 char *s; 5599 5600 if (_Unpickler_Read(self, &s, 1) < 0) 5601 return -1; 5602 5603 idx = Py_CHARMASK(s[0]); 5604 5605 value = _Unpickler_MemoGet(self, idx); 5606 if (value == NULL) { 5607 PyObject *key = PyLong_FromSsize_t(idx); 5608 if (key != NULL) { 5609 PyErr_SetObject(PyExc_KeyError, key); 5610 Py_DECREF(key); 5611 } 5612 return -1; 5613 } 5614 5615 PDATA_APPEND(self->stack, value, -1); 5616 return 0; 5617 } 5618 5619 static int 5620 load_long_binget(UnpicklerObject *self) 5621 { 5622 PyObject *value; 5623 Py_ssize_t idx; 5624 char *s; 5625 5626 if (_Unpickler_Read(self, &s, 4) < 0) 5627 return -1; 5628 5629 idx = calc_binsize(s, 4); 5630 5631 value = _Unpickler_MemoGet(self, idx); 5632 if (value == NULL) { 5633 PyObject *key = PyLong_FromSsize_t(idx); 5634 if (key != NULL) { 5635 PyErr_SetObject(PyExc_KeyError, key); 5636 Py_DECREF(key); 5637 } 5638 return -1; 5639 } 5640 5641 PDATA_APPEND(self->stack, value, -1); 5642 return 0; 5643 } 5644 5645 /* Push an object from the extension registry (EXT[124]). nbytes is 5646 * the number of bytes following the opcode, holding the index (code) value. 5647 */ 5648 static int 5649 load_extension(UnpicklerObject *self, int nbytes) 5650 { 5651 char *codebytes; /* the nbytes bytes after the opcode */ 5652 long code; /* calc_binint returns long */ 5653 PyObject *py_code; /* code as a Python int */ 5654 PyObject *obj; /* the object to push */ 5655 PyObject *pair; /* (module_name, class_name) */ 5656 PyObject *module_name, *class_name; 5657 PickleState *st = _Pickle_GetGlobalState(); 5658 5659 assert(nbytes == 1 || nbytes == 2 || nbytes == 4); 5660 if (_Unpickler_Read(self, &codebytes, nbytes) < 0) 5661 return -1; 5662 code = calc_binint(codebytes, nbytes); 5663 if (code <= 0) { /* note that 0 is forbidden */ 5664 /* Corrupt or hostile pickle. */ 5665 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0"); 5666 return -1; 5667 } 5668 5669 /* Look for the code in the cache. */ 5670 py_code = PyLong_FromLong(code); 5671 if (py_code == NULL) 5672 return -1; 5673 obj = PyDict_GetItemWithError(st->extension_cache, py_code); 5674 if (obj != NULL) { 5675 /* Bingo. */ 5676 Py_DECREF(py_code); 5677 PDATA_APPEND(self->stack, obj, -1); 5678 return 0; 5679 } 5680 if (PyErr_Occurred()) { 5681 Py_DECREF(py_code); 5682 return -1; 5683 } 5684 5685 /* Look up the (module_name, class_name) pair. */ 5686 pair = PyDict_GetItemWithError(st->inverted_registry, py_code); 5687 if (pair == NULL) { 5688 Py_DECREF(py_code); 5689 if (!PyErr_Occurred()) { 5690 PyErr_Format(PyExc_ValueError, "unregistered extension " 5691 "code %ld", code); 5692 } 5693 return -1; 5694 } 5695 /* Since the extension registry is manipulable via Python code, 5696 * confirm that pair is really a 2-tuple of strings. 5697 */ 5698 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 || 5699 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) || 5700 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) { 5701 Py_DECREF(py_code); 5702 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] " 5703 "isn't a 2-tuple of strings", code); 5704 return -1; 5705 } 5706 /* Load the object. */ 5707 obj = find_class(self, module_name, class_name); 5708 if (obj == NULL) { 5709 Py_DECREF(py_code); 5710 return -1; 5711 } 5712 /* Cache code -> obj. */ 5713 code = PyDict_SetItem(st->extension_cache, py_code, obj); 5714 Py_DECREF(py_code); 5715 if (code < 0) { 5716 Py_DECREF(obj); 5717 return -1; 5718 } 5719 PDATA_PUSH(self->stack, obj, -1); 5720 return 0; 5721 } 5722 5723 static int 5724 load_put(UnpicklerObject *self) 5725 { 5726 PyObject *key, *value; 5727 Py_ssize_t idx; 5728 Py_ssize_t len; 5729 char *s; 5730 5731 if ((len = _Unpickler_Readline(self, &s)) < 0) 5732 return -1; 5733 if (len < 2) 5734 return bad_readline(); 5735 if (Py_SIZE(self->stack) <= self->stack->fence) 5736 return Pdata_stack_underflow(self->stack); 5737 value = self->stack->data[Py_SIZE(self->stack) - 1]; 5738 5739 key = PyLong_FromString(s, NULL, 10); 5740 if (key == NULL) 5741 return -1; 5742 idx = PyLong_AsSsize_t(key); 5743 Py_DECREF(key); 5744 if (idx < 0) { 5745 if (!PyErr_Occurred()) 5746 PyErr_SetString(PyExc_ValueError, 5747 "negative PUT argument"); 5748 return -1; 5749 } 5750 5751 return _Unpickler_MemoPut(self, idx, value); 5752 } 5753 5754 static int 5755 load_binput(UnpicklerObject *self) 5756 { 5757 PyObject *value; 5758 Py_ssize_t idx; 5759 char *s; 5760 5761 if (_Unpickler_Read(self, &s, 1) < 0) 5762 return -1; 5763 5764 if (Py_SIZE(self->stack) <= self->stack->fence) 5765 return Pdata_stack_underflow(self->stack); 5766 value = self->stack->data[Py_SIZE(self->stack) - 1]; 5767 5768 idx = Py_CHARMASK(s[0]); 5769 5770 return _Unpickler_MemoPut(self, idx, value); 5771 } 5772 5773 static int 5774 load_long_binput(UnpicklerObject *self) 5775 { 5776 PyObject *value; 5777 Py_ssize_t idx; 5778 char *s; 5779 5780 if (_Unpickler_Read(self, &s, 4) < 0) 5781 return -1; 5782 5783 if (Py_SIZE(self->stack) <= self->stack->fence) 5784 return Pdata_stack_underflow(self->stack); 5785 value = self->stack->data[Py_SIZE(self->stack) - 1]; 5786 5787 idx = calc_binsize(s, 4); 5788 if (idx < 0) { 5789 PyErr_SetString(PyExc_ValueError, 5790 "negative LONG_BINPUT argument"); 5791 return -1; 5792 } 5793 5794 return _Unpickler_MemoPut(self, idx, value); 5795 } 5796 5797 static int 5798 load_memoize(UnpicklerObject *self) 5799 { 5800 PyObject *value; 5801 5802 if (Py_SIZE(self->stack) <= self->stack->fence) 5803 return Pdata_stack_underflow(self->stack); 5804 value = self->stack->data[Py_SIZE(self->stack) - 1]; 5805 5806 return _Unpickler_MemoPut(self, self->memo_len, value); 5807 } 5808 5809 static int 5810 do_append(UnpicklerObject *self, Py_ssize_t x) 5811 { 5812 PyObject *value; 5813 PyObject *list; 5814 Py_ssize_t len, i; 5815 5816 len = Py_SIZE(self->stack); 5817 if (x > len || x <= self->stack->fence) 5818 return Pdata_stack_underflow(self->stack); 5819 if (len == x) /* nothing to do */ 5820 return 0; 5821 5822 list = self->stack->data[x - 1]; 5823 5824 if (PyList_Check(list)) { 5825 PyObject *slice; 5826 Py_ssize_t list_len; 5827 int ret; 5828 5829 slice = Pdata_poplist(self->stack, x); 5830 if (!slice) 5831 return -1; 5832 list_len = PyList_GET_SIZE(list); 5833 ret = PyList_SetSlice(list, list_len, list_len, slice); 5834 Py_DECREF(slice); 5835 return ret; 5836 } 5837 else { 5838 PyObject *append_func; 5839 _Py_IDENTIFIER(append); 5840 5841 append_func = _PyObject_GetAttrId(list, &PyId_append); 5842 if (append_func == NULL) 5843 return -1; 5844 for (i = x; i < len; i++) { 5845 PyObject *result; 5846 5847 value = self->stack->data[i]; 5848 result = _Pickle_FastCall(append_func, value); 5849 if (result == NULL) { 5850 Pdata_clear(self->stack, i + 1); 5851 Py_SIZE(self->stack) = x; 5852 Py_DECREF(append_func); 5853 return -1; 5854 } 5855 Py_DECREF(result); 5856 } 5857 Py_SIZE(self->stack) = x; 5858 Py_DECREF(append_func); 5859 } 5860 5861 return 0; 5862 } 5863 5864 static int 5865 load_append(UnpicklerObject *self) 5866 { 5867 if (Py_SIZE(self->stack) - 1 <= self->stack->fence) 5868 return Pdata_stack_underflow(self->stack); 5869 return do_append(self, Py_SIZE(self->stack) - 1); 5870 } 5871 5872 static int 5873 load_appends(UnpicklerObject *self) 5874 { 5875 Py_ssize_t i = marker(self); 5876 if (i < 0) 5877 return -1; 5878 return do_append(self, i); 5879 } 5880 5881 static int 5882 do_setitems(UnpicklerObject *self, Py_ssize_t x) 5883 { 5884 PyObject *value, *key; 5885 PyObject *dict; 5886 Py_ssize_t len, i; 5887 int status = 0; 5888 5889 len = Py_SIZE(self->stack); 5890 if (x > len || x <= self->stack->fence) 5891 return Pdata_stack_underflow(self->stack); 5892 if (len == x) /* nothing to do */ 5893 return 0; 5894 if ((len - x) % 2 != 0) { 5895 PickleState *st = _Pickle_GetGlobalState(); 5896 /* Currupt or hostile pickle -- we never write one like this. */ 5897 PyErr_SetString(st->UnpicklingError, 5898 "odd number of items for SETITEMS"); 5899 return -1; 5900 } 5901 5902 /* Here, dict does not actually need to be a PyDict; it could be anything 5903 that supports the __setitem__ attribute. */ 5904 dict = self->stack->data[x - 1]; 5905 5906 for (i = x + 1; i < len; i += 2) { 5907 key = self->stack->data[i - 1]; 5908 value = self->stack->data[i]; 5909 if (PyObject_SetItem(dict, key, value) < 0) { 5910 status = -1; 5911 break; 5912 } 5913 } 5914 5915 Pdata_clear(self->stack, x); 5916 return status; 5917 } 5918 5919 static int 5920 load_setitem(UnpicklerObject *self) 5921 { 5922 return do_setitems(self, Py_SIZE(self->stack) - 2); 5923 } 5924 5925 static int 5926 load_setitems(UnpicklerObject *self) 5927 { 5928 Py_ssize_t i = marker(self); 5929 if (i < 0) 5930 return -1; 5931 return do_setitems(self, i); 5932 } 5933 5934 static int 5935 load_additems(UnpicklerObject *self) 5936 { 5937 PyObject *set; 5938 Py_ssize_t mark, len, i; 5939 5940 mark = marker(self); 5941 if (mark < 0) 5942 return -1; 5943 len = Py_SIZE(self->stack); 5944 if (mark > len || mark <= self->stack->fence) 5945 return Pdata_stack_underflow(self->stack); 5946 if (len == mark) /* nothing to do */ 5947 return 0; 5948 5949 set = self->stack->data[mark - 1]; 5950 5951 if (PySet_Check(set)) { 5952 PyObject *items; 5953 int status; 5954 5955 items = Pdata_poptuple(self->stack, mark); 5956 if (items == NULL) 5957 return -1; 5958 5959 status = _PySet_Update(set, items); 5960 Py_DECREF(items); 5961 return status; 5962 } 5963 else { 5964 PyObject *add_func; 5965 _Py_IDENTIFIER(add); 5966 5967 add_func = _PyObject_GetAttrId(set, &PyId_add); 5968 if (add_func == NULL) 5969 return -1; 5970 for (i = mark; i < len; i++) { 5971 PyObject *result; 5972 PyObject *item; 5973 5974 item = self->stack->data[i]; 5975 result = _Pickle_FastCall(add_func, item); 5976 if (result == NULL) { 5977 Pdata_clear(self->stack, i + 1); 5978 Py_SIZE(self->stack) = mark; 5979 return -1; 5980 } 5981 Py_DECREF(result); 5982 } 5983 Py_SIZE(self->stack) = mark; 5984 } 5985 5986 return 0; 5987 } 5988 5989 static int 5990 load_build(UnpicklerObject *self) 5991 { 5992 PyObject *state, *inst, *slotstate; 5993 PyObject *setstate; 5994 int status = 0; 5995 _Py_IDENTIFIER(__setstate__); 5996 5997 /* Stack is ... instance, state. We want to leave instance at 5998 * the stack top, possibly mutated via instance.__setstate__(state). 5999 */ 6000 if (Py_SIZE(self->stack) - 2 < self->stack->fence) 6001 return Pdata_stack_underflow(self->stack); 6002 6003 PDATA_POP(self->stack, state); 6004 if (state == NULL) 6005 return -1; 6006 6007 inst = self->stack->data[Py_SIZE(self->stack) - 1]; 6008 6009 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__); 6010 if (setstate == NULL) { 6011 if (PyErr_ExceptionMatches(PyExc_AttributeError)) 6012 PyErr_Clear(); 6013 else { 6014 Py_DECREF(state); 6015 return -1; 6016 } 6017 } 6018 else { 6019 PyObject *result; 6020 6021 /* The explicit __setstate__ is responsible for everything. */ 6022 result = _Pickle_FastCall(setstate, state); 6023 Py_DECREF(setstate); 6024 if (result == NULL) 6025 return -1; 6026 Py_DECREF(result); 6027 return 0; 6028 } 6029 6030 /* A default __setstate__. First see whether state embeds a 6031 * slot state dict too (a proto 2 addition). 6032 */ 6033 if (PyTuple_Check(state) && Py_SIZE(state) == 2) { 6034 PyObject *tmp = state; 6035 6036 state = PyTuple_GET_ITEM(tmp, 0); 6037 slotstate = PyTuple_GET_ITEM(tmp, 1); 6038 Py_INCREF(state); 6039 Py_INCREF(slotstate); 6040 Py_DECREF(tmp); 6041 } 6042 else 6043 slotstate = NULL; 6044 6045 /* Set inst.__dict__ from the state dict (if any). */ 6046 if (state != Py_None) { 6047 PyObject *dict; 6048 PyObject *d_key, *d_value; 6049 Py_ssize_t i; 6050 _Py_IDENTIFIER(__dict__); 6051 6052 if (!PyDict_Check(state)) { 6053 PickleState *st = _Pickle_GetGlobalState(); 6054 PyErr_SetString(st->UnpicklingError, "state is not a dictionary"); 6055 goto error; 6056 } 6057 dict = _PyObject_GetAttrId(inst, &PyId___dict__); 6058 if (dict == NULL) 6059 goto error; 6060 6061 i = 0; 6062 while (PyDict_Next(state, &i, &d_key, &d_value)) { 6063 /* normally the keys for instance attributes are 6064 interned. we should try to do that here. */ 6065 Py_INCREF(d_key); 6066 if (PyUnicode_CheckExact(d_key)) 6067 PyUnicode_InternInPlace(&d_key); 6068 if (PyObject_SetItem(dict, d_key, d_value) < 0) { 6069 Py_DECREF(d_key); 6070 goto error; 6071 } 6072 Py_DECREF(d_key); 6073 } 6074 Py_DECREF(dict); 6075 } 6076 6077 /* Also set instance attributes from the slotstate dict (if any). */ 6078 if (slotstate != NULL) { 6079 PyObject *d_key, *d_value; 6080 Py_ssize_t i; 6081 6082 if (!PyDict_Check(slotstate)) { 6083 PickleState *st = _Pickle_GetGlobalState(); 6084 PyErr_SetString(st->UnpicklingError, 6085 "slot state is not a dictionary"); 6086 goto error; 6087 } 6088 i = 0; 6089 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) { 6090 if (PyObject_SetAttr(inst, d_key, d_value) < 0) 6091 goto error; 6092 } 6093 } 6094 6095 if (0) { 6096 error: 6097 status = -1; 6098 } 6099 6100 Py_DECREF(state); 6101 Py_XDECREF(slotstate); 6102 return status; 6103 } 6104 6105 static int 6106 load_mark(UnpicklerObject *self) 6107 { 6108 6109 /* Note that we split the (pickle.py) stack into two stacks, an 6110 * object stack and a mark stack. Here we push a mark onto the 6111 * mark stack. 6112 */ 6113 6114 if ((self->num_marks + 1) >= self->marks_size) { 6115 size_t alloc; 6116 6117 /* Use the size_t type to check for overflow. */ 6118 alloc = ((size_t)self->num_marks << 1) + 20; 6119 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) || 6120 alloc <= ((size_t)self->num_marks + 1)) { 6121 PyErr_NoMemory(); 6122 return -1; 6123 } 6124 6125 if (self->marks == NULL) 6126 self->marks = PyMem_NEW(Py_ssize_t, alloc); 6127 else 6128 PyMem_RESIZE(self->marks, Py_ssize_t, alloc); 6129 if (self->marks == NULL) { 6130 self->marks_size = 0; 6131 PyErr_NoMemory(); 6132 return -1; 6133 } 6134 self->marks_size = (Py_ssize_t)alloc; 6135 } 6136 6137 self->stack->mark_set = 1; 6138 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack); 6139 6140 return 0; 6141 } 6142 6143 static int 6144 load_reduce(UnpicklerObject *self) 6145 { 6146 PyObject *callable = NULL; 6147 PyObject *argtup = NULL; 6148 PyObject *obj = NULL; 6149 6150 PDATA_POP(self->stack, argtup); 6151 if (argtup == NULL) 6152 return -1; 6153 PDATA_POP(self->stack, callable); 6154 if (callable) { 6155 obj = PyObject_CallObject(callable, argtup); 6156 Py_DECREF(callable); 6157 } 6158 Py_DECREF(argtup); 6159 6160 if (obj == NULL) 6161 return -1; 6162 6163 PDATA_PUSH(self->stack, obj, -1); 6164 return 0; 6165 } 6166 6167 /* Just raises an error if we don't know the protocol specified. PROTO 6168 * is the first opcode for protocols >= 2. 6169 */ 6170 static int 6171 load_proto(UnpicklerObject *self) 6172 { 6173 char *s; 6174 int i; 6175 6176 if (_Unpickler_Read(self, &s, 1) < 0) 6177 return -1; 6178 6179 i = (unsigned char)s[0]; 6180 if (i <= HIGHEST_PROTOCOL) { 6181 self->proto = i; 6182 return 0; 6183 } 6184 6185 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i); 6186 return -1; 6187 } 6188 6189 static int 6190 load_frame(UnpicklerObject *self) 6191 { 6192 char *s; 6193 Py_ssize_t frame_len; 6194 6195 if (_Unpickler_Read(self, &s, 8) < 0) 6196 return -1; 6197 6198 frame_len = calc_binsize(s, 8); 6199 if (frame_len < 0) { 6200 PyErr_Format(PyExc_OverflowError, 6201 "FRAME length exceeds system's maximum of %zd bytes", 6202 PY_SSIZE_T_MAX); 6203 return -1; 6204 } 6205 6206 if (_Unpickler_Read(self, &s, frame_len) < 0) 6207 return -1; 6208 6209 /* Rewind to start of frame */ 6210 self->next_read_idx -= frame_len; 6211 return 0; 6212 } 6213 6214 static PyObject * 6215 load(UnpicklerObject *self) 6216 { 6217 PyObject *value = NULL; 6218 char *s = NULL; 6219 6220 self->num_marks = 0; 6221 self->stack->mark_set = 0; 6222 self->stack->fence = 0; 6223 self->proto = 0; 6224 if (Py_SIZE(self->stack)) 6225 Pdata_clear(self->stack, 0); 6226 6227 /* Convenient macros for the dispatch while-switch loop just below. */ 6228 #define OP(opcode, load_func) \ 6229 case opcode: if (load_func(self) < 0) break; continue; 6230 6231 #define OP_ARG(opcode, load_func, arg) \ 6232 case opcode: if (load_func(self, (arg)) < 0) break; continue; 6233 6234 while (1) { 6235 if (_Unpickler_Read(self, &s, 1) < 0) { 6236 PickleState *st = _Pickle_GetGlobalState(); 6237 if (PyErr_ExceptionMatches(st->UnpicklingError)) { 6238 PyErr_Format(PyExc_EOFError, "Ran out of input"); 6239 } 6240 return NULL; 6241 } 6242 6243 switch ((enum opcode)s[0]) { 6244 OP(NONE, load_none) 6245 OP(BININT, load_binint) 6246 OP(BININT1, load_binint1) 6247 OP(BININT2, load_binint2) 6248 OP(INT, load_int) 6249 OP(LONG, load_long) 6250 OP_ARG(LONG1, load_counted_long, 1) 6251 OP_ARG(LONG4, load_counted_long, 4) 6252 OP(FLOAT, load_float) 6253 OP(BINFLOAT, load_binfloat) 6254 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1) 6255 OP_ARG(BINBYTES, load_counted_binbytes, 4) 6256 OP_ARG(BINBYTES8, load_counted_binbytes, 8) 6257 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1) 6258 OP_ARG(BINSTRING, load_counted_binstring, 4) 6259 OP(STRING, load_string) 6260 OP(UNICODE, load_unicode) 6261 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1) 6262 OP_ARG(BINUNICODE, load_counted_binunicode, 4) 6263 OP_ARG(BINUNICODE8, load_counted_binunicode, 8) 6264 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0) 6265 OP_ARG(TUPLE1, load_counted_tuple, 1) 6266 OP_ARG(TUPLE2, load_counted_tuple, 2) 6267 OP_ARG(TUPLE3, load_counted_tuple, 3) 6268 OP(TUPLE, load_tuple) 6269 OP(EMPTY_LIST, load_empty_list) 6270 OP(LIST, load_list) 6271 OP(EMPTY_DICT, load_empty_dict) 6272 OP(DICT, load_dict) 6273 OP(EMPTY_SET, load_empty_set) 6274 OP(ADDITEMS, load_additems) 6275 OP(FROZENSET, load_frozenset) 6276 OP(OBJ, load_obj) 6277 OP(INST, load_inst) 6278 OP(NEWOBJ, load_newobj) 6279 OP(NEWOBJ_EX, load_newobj_ex) 6280 OP(GLOBAL, load_global) 6281 OP(STACK_GLOBAL, load_stack_global) 6282 OP(APPEND, load_append) 6283 OP(APPENDS, load_appends) 6284 OP(BUILD, load_build) 6285 OP(DUP, load_dup) 6286 OP(BINGET, load_binget) 6287 OP(LONG_BINGET, load_long_binget) 6288 OP(GET, load_get) 6289 OP(MARK, load_mark) 6290 OP(BINPUT, load_binput) 6291 OP(LONG_BINPUT, load_long_binput) 6292 OP(PUT, load_put) 6293 OP(MEMOIZE, load_memoize) 6294 OP(POP, load_pop) 6295 OP(POP_MARK, load_pop_mark) 6296 OP(SETITEM, load_setitem) 6297 OP(SETITEMS, load_setitems) 6298 OP(PERSID, load_persid) 6299 OP(BINPERSID, load_binpersid) 6300 OP(REDUCE, load_reduce) 6301 OP(PROTO, load_proto) 6302 OP(FRAME, load_frame) 6303 OP_ARG(EXT1, load_extension, 1) 6304 OP_ARG(EXT2, load_extension, 2) 6305 OP_ARG(EXT4, load_extension, 4) 6306 OP_ARG(NEWTRUE, load_bool, Py_True) 6307 OP_ARG(NEWFALSE, load_bool, Py_False) 6308 6309 case STOP: 6310 break; 6311 6312 default: 6313 { 6314 PickleState *st = _Pickle_GetGlobalState(); 6315 unsigned char c = (unsigned char) *s; 6316 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') { 6317 PyErr_Format(st->UnpicklingError, 6318 "invalid load key, '%c'.", c); 6319 } 6320 else { 6321 PyErr_Format(st->UnpicklingError, 6322 "invalid load key, '\\x%02x'.", c); 6323 } 6324 return NULL; 6325 } 6326 } 6327 6328 break; /* and we are done! */ 6329 } 6330 6331 if (PyErr_Occurred()) { 6332 return NULL; 6333 } 6334 6335 if (_Unpickler_SkipConsumed(self) < 0) 6336 return NULL; 6337 6338 PDATA_POP(self->stack, value); 6339 return value; 6340 } 6341 6342 /*[clinic input] 6343 6344 _pickle.Unpickler.load 6345 6346 Load a pickle. 6347 6348 Read a pickled object representation from the open file object given 6349 in the constructor, and return the reconstituted object hierarchy 6350 specified therein. 6351 [clinic start generated code]*/ 6352 6353 static PyObject * 6354 _pickle_Unpickler_load_impl(UnpicklerObject *self) 6355 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/ 6356 { 6357 UnpicklerObject *unpickler = (UnpicklerObject*)self; 6358 6359 /* Check whether the Unpickler was initialized correctly. This prevents 6360 segfaulting if a subclass overridden __init__ with a function that does 6361 not call Unpickler.__init__(). Here, we simply ensure that self->read 6362 is not NULL. */ 6363 if (unpickler->read == NULL) { 6364 PickleState *st = _Pickle_GetGlobalState(); 6365 PyErr_Format(st->UnpicklingError, 6366 "Unpickler.__init__() was not called by %s.__init__()", 6367 Py_TYPE(unpickler)->tp_name); 6368 return NULL; 6369 } 6370 6371 return load(unpickler); 6372 } 6373 6374 /* The name of find_class() is misleading. In newer pickle protocols, this 6375 function is used for loading any global (i.e., functions), not just 6376 classes. The name is kept only for backward compatibility. */ 6377 6378 /*[clinic input] 6379 6380 _pickle.Unpickler.find_class 6381 6382 module_name: object 6383 global_name: object 6384 / 6385 6386 Return an object from a specified module. 6387 6388 If necessary, the module will be imported. Subclasses may override 6389 this method (e.g. to restrict unpickling of arbitrary classes and 6390 functions). 6391 6392 This method is called whenever a class or a function object is 6393 needed. Both arguments passed are str objects. 6394 [clinic start generated code]*/ 6395 6396 static PyObject * 6397 _pickle_Unpickler_find_class_impl(UnpicklerObject *self, 6398 PyObject *module_name, 6399 PyObject *global_name) 6400 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/ 6401 { 6402 PyObject *global; 6403 PyObject *modules_dict; 6404 PyObject *module; 6405 _Py_IDENTIFIER(modules); 6406 6407 /* Try to map the old names used in Python 2.x to the new ones used in 6408 Python 3.x. We do this only with old pickle protocols and when the 6409 user has not disabled the feature. */ 6410 if (self->proto < 3 && self->fix_imports) { 6411 PyObject *key; 6412 PyObject *item; 6413 PickleState *st = _Pickle_GetGlobalState(); 6414 6415 /* Check if the global (i.e., a function or a class) was renamed 6416 or moved to another module. */ 6417 key = PyTuple_Pack(2, module_name, global_name); 6418 if (key == NULL) 6419 return NULL; 6420 item = PyDict_GetItemWithError(st->name_mapping_2to3, key); 6421 Py_DECREF(key); 6422 if (item) { 6423 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { 6424 PyErr_Format(PyExc_RuntimeError, 6425 "_compat_pickle.NAME_MAPPING values should be " 6426 "2-tuples, not %.200s", Py_TYPE(item)->tp_name); 6427 return NULL; 6428 } 6429 module_name = PyTuple_GET_ITEM(item, 0); 6430 global_name = PyTuple_GET_ITEM(item, 1); 6431 if (!PyUnicode_Check(module_name) || 6432 !PyUnicode_Check(global_name)) { 6433 PyErr_Format(PyExc_RuntimeError, 6434 "_compat_pickle.NAME_MAPPING values should be " 6435 "pairs of str, not (%.200s, %.200s)", 6436 Py_TYPE(module_name)->tp_name, 6437 Py_TYPE(global_name)->tp_name); 6438 return NULL; 6439 } 6440 } 6441 else if (PyErr_Occurred()) { 6442 return NULL; 6443 } 6444 else { 6445 /* Check if the module was renamed. */ 6446 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name); 6447 if (item) { 6448 if (!PyUnicode_Check(item)) { 6449 PyErr_Format(PyExc_RuntimeError, 6450 "_compat_pickle.IMPORT_MAPPING values should be " 6451 "strings, not %.200s", Py_TYPE(item)->tp_name); 6452 return NULL; 6453 } 6454 module_name = item; 6455 } 6456 else if (PyErr_Occurred()) { 6457 return NULL; 6458 } 6459 } 6460 } 6461 6462 modules_dict = _PySys_GetObjectId(&PyId_modules); 6463 if (modules_dict == NULL) { 6464 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules"); 6465 return NULL; 6466 } 6467 6468 module = PyDict_GetItemWithError(modules_dict, module_name); 6469 if (module == NULL) { 6470 if (PyErr_Occurred()) 6471 return NULL; 6472 module = PyImport_Import(module_name); 6473 if (module == NULL) 6474 return NULL; 6475 global = getattribute(module, global_name, self->proto >= 4); 6476 Py_DECREF(module); 6477 } 6478 else { 6479 global = getattribute(module, global_name, self->proto >= 4); 6480 } 6481 return global; 6482 } 6483 6484 /*[clinic input] 6485 6486 _pickle.Unpickler.__sizeof__ -> Py_ssize_t 6487 6488 Returns size in memory, in bytes. 6489 [clinic start generated code]*/ 6490 6491 static Py_ssize_t 6492 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self) 6493 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/ 6494 { 6495 Py_ssize_t res; 6496 6497 res = _PyObject_SIZE(Py_TYPE(self)); 6498 if (self->memo != NULL) 6499 res += self->memo_size * sizeof(PyObject *); 6500 if (self->marks != NULL) 6501 res += self->marks_size * sizeof(Py_ssize_t); 6502 if (self->input_line != NULL) 6503 res += strlen(self->input_line) + 1; 6504 if (self->encoding != NULL) 6505 res += strlen(self->encoding) + 1; 6506 if (self->errors != NULL) 6507 res += strlen(self->errors) + 1; 6508 return res; 6509 } 6510 6511 static struct PyMethodDef Unpickler_methods[] = { 6512 _PICKLE_UNPICKLER_LOAD_METHODDEF 6513 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF 6514 _PICKLE_UNPICKLER___SIZEOF___METHODDEF 6515 {NULL, NULL} /* sentinel */ 6516 }; 6517 6518 static void 6519 Unpickler_dealloc(UnpicklerObject *self) 6520 { 6521 PyObject_GC_UnTrack((PyObject *)self); 6522 Py_XDECREF(self->readline); 6523 Py_XDECREF(self->read); 6524 Py_XDECREF(self->peek); 6525 Py_XDECREF(self->stack); 6526 Py_XDECREF(self->pers_func); 6527 if (self->buffer.buf != NULL) { 6528 PyBuffer_Release(&self->buffer); 6529 self->buffer.buf = NULL; 6530 } 6531 6532 _Unpickler_MemoCleanup(self); 6533 PyMem_Free(self->marks); 6534 PyMem_Free(self->input_line); 6535 PyMem_Free(self->encoding); 6536 PyMem_Free(self->errors); 6537 6538 Py_TYPE(self)->tp_free((PyObject *)self); 6539 } 6540 6541 static int 6542 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg) 6543 { 6544 Py_VISIT(self->readline); 6545 Py_VISIT(self->read); 6546 Py_VISIT(self->peek); 6547 Py_VISIT(self->stack); 6548 Py_VISIT(self->pers_func); 6549 return 0; 6550 } 6551 6552 static int 6553 Unpickler_clear(UnpicklerObject *self) 6554 { 6555 Py_CLEAR(self->readline); 6556 Py_CLEAR(self->read); 6557 Py_CLEAR(self->peek); 6558 Py_CLEAR(self->stack); 6559 Py_CLEAR(self->pers_func); 6560 if (self->buffer.buf != NULL) { 6561 PyBuffer_Release(&self->buffer); 6562 self->buffer.buf = NULL; 6563 } 6564 6565 _Unpickler_MemoCleanup(self); 6566 PyMem_Free(self->marks); 6567 self->marks = NULL; 6568 PyMem_Free(self->input_line); 6569 self->input_line = NULL; 6570 PyMem_Free(self->encoding); 6571 self->encoding = NULL; 6572 PyMem_Free(self->errors); 6573 self->errors = NULL; 6574 6575 return 0; 6576 } 6577 6578 /*[clinic input] 6579 6580 _pickle.Unpickler.__init__ 6581 6582 file: object 6583 * 6584 fix_imports: bool = True 6585 encoding: str = 'ASCII' 6586 errors: str = 'strict' 6587 6588 This takes a binary file for reading a pickle data stream. 6589 6590 The protocol version of the pickle is detected automatically, so no 6591 protocol argument is needed. Bytes past the pickled object's 6592 representation are ignored. 6593 6594 The argument *file* must have two methods, a read() method that takes 6595 an integer argument, and a readline() method that requires no 6596 arguments. Both methods should return bytes. Thus *file* can be a 6597 binary file object opened for reading, an io.BytesIO object, or any 6598 other custom object that meets this interface. 6599 6600 Optional keyword arguments are *fix_imports*, *encoding* and *errors*, 6601 which are used to control compatibility support for pickle stream 6602 generated by Python 2. If *fix_imports* is True, pickle will try to 6603 map the old Python 2 names to the new names used in Python 3. The 6604 *encoding* and *errors* tell pickle how to decode 8-bit string 6605 instances pickled by Python 2; these default to 'ASCII' and 'strict', 6606 respectively. The *encoding* can be 'bytes' to read these 8-bit 6607 string instances as bytes objects. 6608 [clinic start generated code]*/ 6609 6610 static int 6611 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file, 6612 int fix_imports, const char *encoding, 6613 const char *errors) 6614 /*[clinic end generated code: output=e2c8ce748edc57b0 input=f9b7da04f5f4f335]*/ 6615 { 6616 _Py_IDENTIFIER(persistent_load); 6617 6618 /* In case of multiple __init__() calls, clear previous content. */ 6619 if (self->read != NULL) 6620 (void)Unpickler_clear(self); 6621 6622 if (_Unpickler_SetInputStream(self, file) < 0) 6623 return -1; 6624 6625 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0) 6626 return -1; 6627 6628 self->fix_imports = fix_imports; 6629 if (self->fix_imports == -1) 6630 return -1; 6631 6632 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) { 6633 self->pers_func = _PyObject_GetAttrId((PyObject *)self, 6634 &PyId_persistent_load); 6635 if (self->pers_func == NULL) 6636 return 1; 6637 } 6638 else { 6639 self->pers_func = NULL; 6640 } 6641 6642 self->stack = (Pdata *)Pdata_New(); 6643 if (self->stack == NULL) 6644 return 1; 6645 6646 self->memo_size = 32; 6647 self->memo = _Unpickler_NewMemo(self->memo_size); 6648 if (self->memo == NULL) 6649 return -1; 6650 6651 self->proto = 0; 6652 6653 return 0; 6654 } 6655 6656 6657 /* Define a proxy object for the Unpickler's internal memo object. This is to 6658 * avoid breaking code like: 6659 * unpickler.memo.clear() 6660 * and 6661 * unpickler.memo = saved_memo 6662 * Is this a good idea? Not really, but we don't want to break code that uses 6663 * it. Note that we don't implement the entire mapping API here. This is 6664 * intentional, as these should be treated as black-box implementation details. 6665 * 6666 * We do, however, have to implement pickling/unpickling support because of 6667 * real-world code like cvs2svn. 6668 */ 6669 6670 /*[clinic input] 6671 _pickle.UnpicklerMemoProxy.clear 6672 6673 Remove all items from memo. 6674 [clinic start generated code]*/ 6675 6676 static PyObject * 6677 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self) 6678 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/ 6679 { 6680 _Unpickler_MemoCleanup(self->unpickler); 6681 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size); 6682 if (self->unpickler->memo == NULL) 6683 return NULL; 6684 Py_RETURN_NONE; 6685 } 6686 6687 /*[clinic input] 6688 _pickle.UnpicklerMemoProxy.copy 6689 6690 Copy the memo to a new object. 6691 [clinic start generated code]*/ 6692 6693 static PyObject * 6694 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self) 6695 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/ 6696 { 6697 Py_ssize_t i; 6698 PyObject *new_memo = PyDict_New(); 6699 if (new_memo == NULL) 6700 return NULL; 6701 6702 for (i = 0; i < self->unpickler->memo_size; i++) { 6703 int status; 6704 PyObject *key, *value; 6705 6706 value = self->unpickler->memo[i]; 6707 if (value == NULL) 6708 continue; 6709 6710 key = PyLong_FromSsize_t(i); 6711 if (key == NULL) 6712 goto error; 6713 status = PyDict_SetItem(new_memo, key, value); 6714 Py_DECREF(key); 6715 if (status < 0) 6716 goto error; 6717 } 6718 return new_memo; 6719 6720 error: 6721 Py_DECREF(new_memo); 6722 return NULL; 6723 } 6724 6725 /*[clinic input] 6726 _pickle.UnpicklerMemoProxy.__reduce__ 6727 6728 Implement pickling support. 6729 [clinic start generated code]*/ 6730 6731 static PyObject * 6732 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self) 6733 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/ 6734 { 6735 PyObject *reduce_value; 6736 PyObject *constructor_args; 6737 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self); 6738 if (contents == NULL) 6739 return NULL; 6740 6741 reduce_value = PyTuple_New(2); 6742 if (reduce_value == NULL) { 6743 Py_DECREF(contents); 6744 return NULL; 6745 } 6746 constructor_args = PyTuple_New(1); 6747 if (constructor_args == NULL) { 6748 Py_DECREF(contents); 6749 Py_DECREF(reduce_value); 6750 return NULL; 6751 } 6752 PyTuple_SET_ITEM(constructor_args, 0, contents); 6753 Py_INCREF((PyObject *)&PyDict_Type); 6754 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type); 6755 PyTuple_SET_ITEM(reduce_value, 1, constructor_args); 6756 return reduce_value; 6757 } 6758 6759 static PyMethodDef unpicklerproxy_methods[] = { 6760 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF 6761 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF 6762 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF 6763 {NULL, NULL} /* sentinel */ 6764 }; 6765 6766 static void 6767 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self) 6768 { 6769 PyObject_GC_UnTrack(self); 6770 Py_XDECREF(self->unpickler); 6771 PyObject_GC_Del((PyObject *)self); 6772 } 6773 6774 static int 6775 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self, 6776 visitproc visit, void *arg) 6777 { 6778 Py_VISIT(self->unpickler); 6779 return 0; 6780 } 6781 6782 static int 6783 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self) 6784 { 6785 Py_CLEAR(self->unpickler); 6786 return 0; 6787 } 6788 6789 static PyTypeObject UnpicklerMemoProxyType = { 6790 PyVarObject_HEAD_INIT(NULL, 0) 6791 "_pickle.UnpicklerMemoProxy", /*tp_name*/ 6792 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/ 6793 0, 6794 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */ 6795 0, /* tp_print */ 6796 0, /* tp_getattr */ 6797 0, /* tp_setattr */ 6798 0, /* tp_compare */ 6799 0, /* tp_repr */ 6800 0, /* tp_as_number */ 6801 0, /* tp_as_sequence */ 6802 0, /* tp_as_mapping */ 6803 PyObject_HashNotImplemented, /* tp_hash */ 6804 0, /* tp_call */ 6805 0, /* tp_str */ 6806 PyObject_GenericGetAttr, /* tp_getattro */ 6807 PyObject_GenericSetAttr, /* tp_setattro */ 6808 0, /* tp_as_buffer */ 6809 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, 6810 0, /* tp_doc */ 6811 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */ 6812 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */ 6813 0, /* tp_richcompare */ 6814 0, /* tp_weaklistoffset */ 6815 0, /* tp_iter */ 6816 0, /* tp_iternext */ 6817 unpicklerproxy_methods, /* tp_methods */ 6818 }; 6819 6820 static PyObject * 6821 UnpicklerMemoProxy_New(UnpicklerObject *unpickler) 6822 { 6823 UnpicklerMemoProxyObject *self; 6824 6825 self = PyObject_GC_New(UnpicklerMemoProxyObject, 6826 &UnpicklerMemoProxyType); 6827 if (self == NULL) 6828 return NULL; 6829 Py_INCREF(unpickler); 6830 self->unpickler = unpickler; 6831 PyObject_GC_Track(self); 6832 return (PyObject *)self; 6833 } 6834 6835 /*****************************************************************************/ 6836 6837 6838 static PyObject * 6839 Unpickler_get_memo(UnpicklerObject *self) 6840 { 6841 return UnpicklerMemoProxy_New(self); 6842 } 6843 6844 static int 6845 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj) 6846 { 6847 PyObject **new_memo; 6848 Py_ssize_t new_memo_size = 0; 6849 Py_ssize_t i; 6850 6851 if (obj == NULL) { 6852 PyErr_SetString(PyExc_TypeError, 6853 "attribute deletion is not supported"); 6854 return -1; 6855 } 6856 6857 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) { 6858 UnpicklerObject *unpickler = 6859 ((UnpicklerMemoProxyObject *)obj)->unpickler; 6860 6861 new_memo_size = unpickler->memo_size; 6862 new_memo = _Unpickler_NewMemo(new_memo_size); 6863 if (new_memo == NULL) 6864 return -1; 6865 6866 for (i = 0; i < new_memo_size; i++) { 6867 Py_XINCREF(unpickler->memo[i]); 6868 new_memo[i] = unpickler->memo[i]; 6869 } 6870 } 6871 else if (PyDict_Check(obj)) { 6872 Py_ssize_t i = 0; 6873 PyObject *key, *value; 6874 6875 new_memo_size = PyDict_Size(obj); 6876 new_memo = _Unpickler_NewMemo(new_memo_size); 6877 if (new_memo == NULL) 6878 return -1; 6879 6880 while (PyDict_Next(obj, &i, &key, &value)) { 6881 Py_ssize_t idx; 6882 if (!PyLong_Check(key)) { 6883 PyErr_SetString(PyExc_TypeError, 6884 "memo key must be integers"); 6885 goto error; 6886 } 6887 idx = PyLong_AsSsize_t(key); 6888 if (idx == -1 && PyErr_Occurred()) 6889 goto error; 6890 if (idx < 0) { 6891 PyErr_SetString(PyExc_ValueError, 6892 "memo key must be positive integers."); 6893 goto error; 6894 } 6895 if (_Unpickler_MemoPut(self, idx, value) < 0) 6896 goto error; 6897 } 6898 } 6899 else { 6900 PyErr_Format(PyExc_TypeError, 6901 "'memo' attribute must be an UnpicklerMemoProxy object" 6902 "or dict, not %.200s", Py_TYPE(obj)->tp_name); 6903 return -1; 6904 } 6905 6906 _Unpickler_MemoCleanup(self); 6907 self->memo_size = new_memo_size; 6908 self->memo = new_memo; 6909 6910 return 0; 6911 6912 error: 6913 if (new_memo_size) { 6914 i = new_memo_size; 6915 while (--i >= 0) { 6916 Py_XDECREF(new_memo[i]); 6917 } 6918 PyMem_FREE(new_memo); 6919 } 6920 return -1; 6921 } 6922 6923 static PyObject * 6924 Unpickler_get_persload(UnpicklerObject *self) 6925 { 6926 if (self->pers_func == NULL) 6927 PyErr_SetString(PyExc_AttributeError, "persistent_load"); 6928 else 6929 Py_INCREF(self->pers_func); 6930 return self->pers_func; 6931 } 6932 6933 static int 6934 Unpickler_set_persload(UnpicklerObject *self, PyObject *value) 6935 { 6936 if (value == NULL) { 6937 PyErr_SetString(PyExc_TypeError, 6938 "attribute deletion is not supported"); 6939 return -1; 6940 } 6941 if (!PyCallable_Check(value)) { 6942 PyErr_SetString(PyExc_TypeError, 6943 "persistent_load must be a callable taking " 6944 "one argument"); 6945 return -1; 6946 } 6947 6948 Py_INCREF(value); 6949 Py_XSETREF(self->pers_func, value); 6950 6951 return 0; 6952 } 6953 6954 static PyGetSetDef Unpickler_getsets[] = { 6955 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo}, 6956 {"persistent_load", (getter)Unpickler_get_persload, 6957 (setter)Unpickler_set_persload}, 6958 {NULL} 6959 }; 6960 6961 static PyTypeObject Unpickler_Type = { 6962 PyVarObject_HEAD_INIT(NULL, 0) 6963 "_pickle.Unpickler", /*tp_name*/ 6964 sizeof(UnpicklerObject), /*tp_basicsize*/ 6965 0, /*tp_itemsize*/ 6966 (destructor)Unpickler_dealloc, /*tp_dealloc*/ 6967 0, /*tp_print*/ 6968 0, /*tp_getattr*/ 6969 0, /*tp_setattr*/ 6970 0, /*tp_reserved*/ 6971 0, /*tp_repr*/ 6972 0, /*tp_as_number*/ 6973 0, /*tp_as_sequence*/ 6974 0, /*tp_as_mapping*/ 6975 0, /*tp_hash*/ 6976 0, /*tp_call*/ 6977 0, /*tp_str*/ 6978 0, /*tp_getattro*/ 6979 0, /*tp_setattro*/ 6980 0, /*tp_as_buffer*/ 6981 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, 6982 _pickle_Unpickler___init____doc__, /*tp_doc*/ 6983 (traverseproc)Unpickler_traverse, /*tp_traverse*/ 6984 (inquiry)Unpickler_clear, /*tp_clear*/ 6985 0, /*tp_richcompare*/ 6986 0, /*tp_weaklistoffset*/ 6987 0, /*tp_iter*/ 6988 0, /*tp_iternext*/ 6989 Unpickler_methods, /*tp_methods*/ 6990 0, /*tp_members*/ 6991 Unpickler_getsets, /*tp_getset*/ 6992 0, /*tp_base*/ 6993 0, /*tp_dict*/ 6994 0, /*tp_descr_get*/ 6995 0, /*tp_descr_set*/ 6996 0, /*tp_dictoffset*/ 6997 _pickle_Unpickler___init__, /*tp_init*/ 6998 PyType_GenericAlloc, /*tp_alloc*/ 6999 PyType_GenericNew, /*tp_new*/ 7000 PyObject_GC_Del, /*tp_free*/ 7001 0, /*tp_is_gc*/ 7002 }; 7003 7004 /*[clinic input] 7005 7006 _pickle.dump 7007 7008 obj: object 7009 file: object 7010 protocol: object = NULL 7011 * 7012 fix_imports: bool = True 7013 7014 Write a pickled representation of obj to the open file object file. 7015 7016 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may 7017 be more efficient. 7018 7019 The optional *protocol* argument tells the pickler to use the given 7020 protocol supported protocols are 0, 1, 2, 3 and 4. The default 7021 protocol is 3; a backward-incompatible protocol designed for Python 3. 7022 7023 Specifying a negative protocol version selects the highest protocol 7024 version supported. The higher the protocol used, the more recent the 7025 version of Python needed to read the pickle produced. 7026 7027 The *file* argument must have a write() method that accepts a single 7028 bytes argument. It can thus be a file object opened for binary 7029 writing, an io.BytesIO instance, or any other custom object that meets 7030 this interface. 7031 7032 If *fix_imports* is True and protocol is less than 3, pickle will try 7033 to map the new Python 3 names to the old module names used in Python 7034 2, so that the pickle data stream is readable with Python 2. 7035 [clinic start generated code]*/ 7036 7037 static PyObject * 7038 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file, 7039 PyObject *protocol, int fix_imports) 7040 /*[clinic end generated code: output=a4774d5fde7d34de input=830f8a64cef6f042]*/ 7041 { 7042 PicklerObject *pickler = _Pickler_New(); 7043 7044 if (pickler == NULL) 7045 return NULL; 7046 7047 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0) 7048 goto error; 7049 7050 if (_Pickler_SetOutputStream(pickler, file) < 0) 7051 goto error; 7052 7053 if (dump(pickler, obj) < 0) 7054 goto error; 7055 7056 if (_Pickler_FlushToFile(pickler) < 0) 7057 goto error; 7058 7059 Py_DECREF(pickler); 7060 Py_RETURN_NONE; 7061 7062 error: 7063 Py_XDECREF(pickler); 7064 return NULL; 7065 } 7066 7067 /*[clinic input] 7068 7069 _pickle.dumps 7070 7071 obj: object 7072 protocol: object = NULL 7073 * 7074 fix_imports: bool = True 7075 7076 Return the pickled representation of the object as a bytes object. 7077 7078 The optional *protocol* argument tells the pickler to use the given 7079 protocol; supported protocols are 0, 1, 2, 3 and 4. The default 7080 protocol is 3; a backward-incompatible protocol designed for Python 3. 7081 7082 Specifying a negative protocol version selects the highest protocol 7083 version supported. The higher the protocol used, the more recent the 7084 version of Python needed to read the pickle produced. 7085 7086 If *fix_imports* is True and *protocol* is less than 3, pickle will 7087 try to map the new Python 3 names to the old module names used in 7088 Python 2, so that the pickle data stream is readable with Python 2. 7089 [clinic start generated code]*/ 7090 7091 static PyObject * 7092 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol, 7093 int fix_imports) 7094 /*[clinic end generated code: output=d75d5cda456fd261 input=293dbeda181580b7]*/ 7095 { 7096 PyObject *result; 7097 PicklerObject *pickler = _Pickler_New(); 7098 7099 if (pickler == NULL) 7100 return NULL; 7101 7102 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0) 7103 goto error; 7104 7105 if (dump(pickler, obj) < 0) 7106 goto error; 7107 7108 result = _Pickler_GetString(pickler); 7109 Py_DECREF(pickler); 7110 return result; 7111 7112 error: 7113 Py_XDECREF(pickler); 7114 return NULL; 7115 } 7116 7117 /*[clinic input] 7118 7119 _pickle.load 7120 7121 file: object 7122 * 7123 fix_imports: bool = True 7124 encoding: str = 'ASCII' 7125 errors: str = 'strict' 7126 7127 Read and return an object from the pickle data stored in a file. 7128 7129 This is equivalent to ``Unpickler(file).load()``, but may be more 7130 efficient. 7131 7132 The protocol version of the pickle is detected automatically, so no 7133 protocol argument is needed. Bytes past the pickled object's 7134 representation are ignored. 7135 7136 The argument *file* must have two methods, a read() method that takes 7137 an integer argument, and a readline() method that requires no 7138 arguments. Both methods should return bytes. Thus *file* can be a 7139 binary file object opened for reading, an io.BytesIO object, or any 7140 other custom object that meets this interface. 7141 7142 Optional keyword arguments are *fix_imports*, *encoding* and *errors*, 7143 which are used to control compatibility support for pickle stream 7144 generated by Python 2. If *fix_imports* is True, pickle will try to 7145 map the old Python 2 names to the new names used in Python 3. The 7146 *encoding* and *errors* tell pickle how to decode 8-bit string 7147 instances pickled by Python 2; these default to 'ASCII' and 'strict', 7148 respectively. The *encoding* can be 'bytes' to read these 8-bit 7149 string instances as bytes objects. 7150 [clinic start generated code]*/ 7151 7152 static PyObject * 7153 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports, 7154 const char *encoding, const char *errors) 7155 /*[clinic end generated code: output=69e298160285199e input=01b44dd3fc07afa7]*/ 7156 { 7157 PyObject *result; 7158 UnpicklerObject *unpickler = _Unpickler_New(); 7159 7160 if (unpickler == NULL) 7161 return NULL; 7162 7163 if (_Unpickler_SetInputStream(unpickler, file) < 0) 7164 goto error; 7165 7166 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0) 7167 goto error; 7168 7169 unpickler->fix_imports = fix_imports; 7170 7171 result = load(unpickler); 7172 Py_DECREF(unpickler); 7173 return result; 7174 7175 error: 7176 Py_XDECREF(unpickler); 7177 return NULL; 7178 } 7179 7180 /*[clinic input] 7181 7182 _pickle.loads 7183 7184 data: object 7185 * 7186 fix_imports: bool = True 7187 encoding: str = 'ASCII' 7188 errors: str = 'strict' 7189 7190 Read and return an object from the given pickle data. 7191 7192 The protocol version of the pickle is detected automatically, so no 7193 protocol argument is needed. Bytes past the pickled object's 7194 representation are ignored. 7195 7196 Optional keyword arguments are *fix_imports*, *encoding* and *errors*, 7197 which are used to control compatibility support for pickle stream 7198 generated by Python 2. If *fix_imports* is True, pickle will try to 7199 map the old Python 2 names to the new names used in Python 3. The 7200 *encoding* and *errors* tell pickle how to decode 8-bit string 7201 instances pickled by Python 2; these default to 'ASCII' and 'strict', 7202 respectively. The *encoding* can be 'bytes' to read these 8-bit 7203 string instances as bytes objects. 7204 [clinic start generated code]*/ 7205 7206 static PyObject * 7207 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports, 7208 const char *encoding, const char *errors) 7209 /*[clinic end generated code: output=1e7cb2343f2c440f input=70605948a719feb9]*/ 7210 { 7211 PyObject *result; 7212 UnpicklerObject *unpickler = _Unpickler_New(); 7213 7214 if (unpickler == NULL) 7215 return NULL; 7216 7217 if (_Unpickler_SetStringInput(unpickler, data) < 0) 7218 goto error; 7219 7220 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0) 7221 goto error; 7222 7223 unpickler->fix_imports = fix_imports; 7224 7225 result = load(unpickler); 7226 Py_DECREF(unpickler); 7227 return result; 7228 7229 error: 7230 Py_XDECREF(unpickler); 7231 return NULL; 7232 } 7233 7234 static struct PyMethodDef pickle_methods[] = { 7235 _PICKLE_DUMP_METHODDEF 7236 _PICKLE_DUMPS_METHODDEF 7237 _PICKLE_LOAD_METHODDEF 7238 _PICKLE_LOADS_METHODDEF 7239 {NULL, NULL} /* sentinel */ 7240 }; 7241 7242 static int 7243 pickle_clear(PyObject *m) 7244 { 7245 _Pickle_ClearState(_Pickle_GetState(m)); 7246 return 0; 7247 } 7248 7249 static void 7250 pickle_free(PyObject *m) 7251 { 7252 _Pickle_ClearState(_Pickle_GetState(m)); 7253 } 7254 7255 static int 7256 pickle_traverse(PyObject *m, visitproc visit, void *arg) 7257 { 7258 PickleState *st = _Pickle_GetState(m); 7259 Py_VISIT(st->PickleError); 7260 Py_VISIT(st->PicklingError); 7261 Py_VISIT(st->UnpicklingError); 7262 Py_VISIT(st->dispatch_table); 7263 Py_VISIT(st->extension_registry); 7264 Py_VISIT(st->extension_cache); 7265 Py_VISIT(st->inverted_registry); 7266 Py_VISIT(st->name_mapping_2to3); 7267 Py_VISIT(st->import_mapping_2to3); 7268 Py_VISIT(st->name_mapping_3to2); 7269 Py_VISIT(st->import_mapping_3to2); 7270 Py_VISIT(st->codecs_encode); 7271 Py_VISIT(st->getattr); 7272 return 0; 7273 } 7274 7275 static struct PyModuleDef _picklemodule = { 7276 PyModuleDef_HEAD_INIT, 7277 "_pickle", /* m_name */ 7278 pickle_module_doc, /* m_doc */ 7279 sizeof(PickleState), /* m_size */ 7280 pickle_methods, /* m_methods */ 7281 NULL, /* m_reload */ 7282 pickle_traverse, /* m_traverse */ 7283 pickle_clear, /* m_clear */ 7284 (freefunc)pickle_free /* m_free */ 7285 }; 7286 7287 PyMODINIT_FUNC 7288 PyInit__pickle(void) 7289 { 7290 PyObject *m; 7291 PickleState *st; 7292 7293 m = PyState_FindModule(&_picklemodule); 7294 if (m) { 7295 Py_INCREF(m); 7296 return m; 7297 } 7298 7299 if (PyType_Ready(&Unpickler_Type) < 0) 7300 return NULL; 7301 if (PyType_Ready(&Pickler_Type) < 0) 7302 return NULL; 7303 if (PyType_Ready(&Pdata_Type) < 0) 7304 return NULL; 7305 if (PyType_Ready(&PicklerMemoProxyType) < 0) 7306 return NULL; 7307 if (PyType_Ready(&UnpicklerMemoProxyType) < 0) 7308 return NULL; 7309 7310 /* Create the module and add the functions. */ 7311 m = PyModule_Create(&_picklemodule); 7312 if (m == NULL) 7313 return NULL; 7314 7315 Py_INCREF(&Pickler_Type); 7316 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0) 7317 return NULL; 7318 Py_INCREF(&Unpickler_Type); 7319 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0) 7320 return NULL; 7321 7322 st = _Pickle_GetState(m); 7323 7324 /* Initialize the exceptions. */ 7325 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL); 7326 if (st->PickleError == NULL) 7327 return NULL; 7328 st->PicklingError = \ 7329 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL); 7330 if (st->PicklingError == NULL) 7331 return NULL; 7332 st->UnpicklingError = \ 7333 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL); 7334 if (st->UnpicklingError == NULL) 7335 return NULL; 7336 7337 Py_INCREF(st->PickleError); 7338 if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0) 7339 return NULL; 7340 Py_INCREF(st->PicklingError); 7341 if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0) 7342 return NULL; 7343 Py_INCREF(st->UnpicklingError); 7344 if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0) 7345 return NULL; 7346 7347 if (_Pickle_InitState(st) < 0) 7348 return NULL; 7349 7350 return m; 7351 } 7352