1 2 /* Write Python objects to files and read them back. 3 This is primarily intended for writing and reading compiled Python code, 4 even though dicts, lists, sets and frozensets, not commonly seen in 5 code objects, are supported. 6 Version 3 of this protocol properly supports circular links 7 and sharing. */ 8 9 #define PY_SSIZE_T_CLEAN 10 11 #include "Python.h" 12 #include "longintrepr.h" 13 #include "code.h" 14 #include "marshal.h" 15 #include "../Modules/hashtable.h" 16 17 /* High water mark to determine when the marshalled object is dangerously deep 18 * and risks coring the interpreter. When the object stack gets this deep, 19 * raise an exception instead of continuing. 20 * On Windows debug builds, reduce this value. 21 */ 22 #if defined(MS_WINDOWS) && defined(_DEBUG) 23 #define MAX_MARSHAL_STACK_DEPTH 1000 24 #else 25 #define MAX_MARSHAL_STACK_DEPTH 2000 26 #endif 27 28 #define TYPE_NULL '0' 29 #define TYPE_NONE 'N' 30 #define TYPE_FALSE 'F' 31 #define TYPE_TRUE 'T' 32 #define TYPE_STOPITER 'S' 33 #define TYPE_ELLIPSIS '.' 34 #define TYPE_INT 'i' 35 #define TYPE_FLOAT 'f' 36 #define TYPE_BINARY_FLOAT 'g' 37 #define TYPE_COMPLEX 'x' 38 #define TYPE_BINARY_COMPLEX 'y' 39 #define TYPE_LONG 'l' 40 #define TYPE_STRING 's' 41 #define TYPE_INTERNED 't' 42 #define TYPE_REF 'r' 43 #define TYPE_TUPLE '(' 44 #define TYPE_LIST '[' 45 #define TYPE_DICT '{' 46 #define TYPE_CODE 'c' 47 #define TYPE_UNICODE 'u' 48 #define TYPE_UNKNOWN '?' 49 #define TYPE_SET '<' 50 #define TYPE_FROZENSET '>' 51 #define FLAG_REF '\x80' /* with a type, add obj to index */ 52 53 #define TYPE_ASCII 'a' 54 #define TYPE_ASCII_INTERNED 'A' 55 #define TYPE_SMALL_TUPLE ')' 56 #define TYPE_SHORT_ASCII 'z' 57 #define TYPE_SHORT_ASCII_INTERNED 'Z' 58 59 #define WFERR_OK 0 60 #define WFERR_UNMARSHALLABLE 1 61 #define WFERR_NESTEDTOODEEP 2 62 #define WFERR_NOMEMORY 3 63 64 typedef struct { 65 FILE *fp; 66 int error; /* see WFERR_* values */ 67 int depth; 68 PyObject *str; 69 char *ptr; 70 char *end; 71 char *buf; 72 _Py_hashtable_t *hashtable; 73 int version; 74 } WFILE; 75 76 #define w_byte(c, p) do { \ 77 if ((p)->ptr != (p)->end || w_reserve((p), 1)) \ 78 *(p)->ptr++ = (c); \ 79 } while(0) 80 81 static void 82 w_flush(WFILE *p) 83 { 84 assert(p->fp != NULL); 85 fwrite(p->buf, 1, p->ptr - p->buf, p->fp); 86 p->ptr = p->buf; 87 } 88 89 static int 90 w_reserve(WFILE *p, Py_ssize_t needed) 91 { 92 Py_ssize_t pos, size, delta; 93 if (p->ptr == NULL) 94 return 0; /* An error already occurred */ 95 if (p->fp != NULL) { 96 w_flush(p); 97 return needed <= p->end - p->ptr; 98 } 99 assert(p->str != NULL); 100 pos = p->ptr - p->buf; 101 size = PyBytes_Size(p->str); 102 if (size > 16*1024*1024) 103 delta = (size >> 3); /* 12.5% overallocation */ 104 else 105 delta = size + 1024; 106 delta = Py_MAX(delta, needed); 107 if (delta > PY_SSIZE_T_MAX - size) { 108 p->error = WFERR_NOMEMORY; 109 return 0; 110 } 111 size += delta; 112 if (_PyBytes_Resize(&p->str, size) != 0) { 113 p->ptr = p->buf = p->end = NULL; 114 return 0; 115 } 116 else { 117 p->buf = PyBytes_AS_STRING(p->str); 118 p->ptr = p->buf + pos; 119 p->end = p->buf + size; 120 return 1; 121 } 122 } 123 124 static void 125 w_string(const char *s, Py_ssize_t n, WFILE *p) 126 { 127 Py_ssize_t m; 128 if (!n || p->ptr == NULL) 129 return; 130 m = p->end - p->ptr; 131 if (p->fp != NULL) { 132 if (n <= m) { 133 memcpy(p->ptr, s, n); 134 p->ptr += n; 135 } 136 else { 137 w_flush(p); 138 fwrite(s, 1, n, p->fp); 139 } 140 } 141 else { 142 if (n <= m || w_reserve(p, n - m)) { 143 memcpy(p->ptr, s, n); 144 p->ptr += n; 145 } 146 } 147 } 148 149 static void 150 w_short(int x, WFILE *p) 151 { 152 w_byte((char)( x & 0xff), p); 153 w_byte((char)((x>> 8) & 0xff), p); 154 } 155 156 static void 157 w_long(long x, WFILE *p) 158 { 159 w_byte((char)( x & 0xff), p); 160 w_byte((char)((x>> 8) & 0xff), p); 161 w_byte((char)((x>>16) & 0xff), p); 162 w_byte((char)((x>>24) & 0xff), p); 163 } 164 165 #define SIZE32_MAX 0x7FFFFFFF 166 167 #if SIZEOF_SIZE_T > 4 168 # define W_SIZE(n, p) do { \ 169 if ((n) > SIZE32_MAX) { \ 170 (p)->depth--; \ 171 (p)->error = WFERR_UNMARSHALLABLE; \ 172 return; \ 173 } \ 174 w_long((long)(n), p); \ 175 } while(0) 176 #else 177 # define W_SIZE w_long 178 #endif 179 180 static void 181 w_pstring(const char *s, Py_ssize_t n, WFILE *p) 182 { 183 W_SIZE(n, p); 184 w_string(s, n, p); 185 } 186 187 static void 188 w_short_pstring(const char *s, Py_ssize_t n, WFILE *p) 189 { 190 w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p); 191 w_string(s, n, p); 192 } 193 194 /* We assume that Python ints are stored internally in base some power of 195 2**15; for the sake of portability we'll always read and write them in base 196 exactly 2**15. */ 197 198 #define PyLong_MARSHAL_SHIFT 15 199 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT) 200 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1) 201 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0 202 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT" 203 #endif 204 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT) 205 206 #define W_TYPE(t, p) do { \ 207 w_byte((t) | flag, (p)); \ 208 } while(0) 209 210 static void 211 w_PyLong(const PyLongObject *ob, char flag, WFILE *p) 212 { 213 Py_ssize_t i, j, n, l; 214 digit d; 215 216 W_TYPE(TYPE_LONG, p); 217 if (Py_SIZE(ob) == 0) { 218 w_long((long)0, p); 219 return; 220 } 221 222 /* set l to number of base PyLong_MARSHAL_BASE digits */ 223 n = Py_ABS(Py_SIZE(ob)); 224 l = (n-1) * PyLong_MARSHAL_RATIO; 225 d = ob->ob_digit[n-1]; 226 assert(d != 0); /* a PyLong is always normalized */ 227 do { 228 d >>= PyLong_MARSHAL_SHIFT; 229 l++; 230 } while (d != 0); 231 if (l > SIZE32_MAX) { 232 p->depth--; 233 p->error = WFERR_UNMARSHALLABLE; 234 return; 235 } 236 w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p); 237 238 for (i=0; i < n-1; i++) { 239 d = ob->ob_digit[i]; 240 for (j=0; j < PyLong_MARSHAL_RATIO; j++) { 241 w_short(d & PyLong_MARSHAL_MASK, p); 242 d >>= PyLong_MARSHAL_SHIFT; 243 } 244 assert (d == 0); 245 } 246 d = ob->ob_digit[n-1]; 247 do { 248 w_short(d & PyLong_MARSHAL_MASK, p); 249 d >>= PyLong_MARSHAL_SHIFT; 250 } while (d != 0); 251 } 252 253 static int 254 w_ref(PyObject *v, char *flag, WFILE *p) 255 { 256 _Py_hashtable_entry_t *entry; 257 int w; 258 259 if (p->version < 3 || p->hashtable == NULL) 260 return 0; /* not writing object references */ 261 262 /* if it has only one reference, it definitely isn't shared */ 263 if (Py_REFCNT(v) == 1) 264 return 0; 265 266 entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v); 267 if (entry != NULL) { 268 /* write the reference index to the stream */ 269 _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w); 270 /* we don't store "long" indices in the dict */ 271 assert(0 <= w && w <= 0x7fffffff); 272 w_byte(TYPE_REF, p); 273 w_long(w, p); 274 return 1; 275 } else { 276 size_t s = p->hashtable->entries; 277 /* we don't support long indices */ 278 if (s >= 0x7fffffff) { 279 PyErr_SetString(PyExc_ValueError, "too many objects"); 280 goto err; 281 } 282 w = (int)s; 283 Py_INCREF(v); 284 if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) { 285 Py_DECREF(v); 286 goto err; 287 } 288 *flag |= FLAG_REF; 289 return 0; 290 } 291 err: 292 p->error = WFERR_UNMARSHALLABLE; 293 return 1; 294 } 295 296 static void 297 w_complex_object(PyObject *v, char flag, WFILE *p); 298 299 static void 300 w_object(PyObject *v, WFILE *p) 301 { 302 char flag = '\0'; 303 304 p->depth++; 305 306 if (p->depth > MAX_MARSHAL_STACK_DEPTH) { 307 p->error = WFERR_NESTEDTOODEEP; 308 } 309 else if (v == NULL) { 310 w_byte(TYPE_NULL, p); 311 } 312 else if (v == Py_None) { 313 w_byte(TYPE_NONE, p); 314 } 315 else if (v == PyExc_StopIteration) { 316 w_byte(TYPE_STOPITER, p); 317 } 318 else if (v == Py_Ellipsis) { 319 w_byte(TYPE_ELLIPSIS, p); 320 } 321 else if (v == Py_False) { 322 w_byte(TYPE_FALSE, p); 323 } 324 else if (v == Py_True) { 325 w_byte(TYPE_TRUE, p); 326 } 327 else if (!w_ref(v, &flag, p)) 328 w_complex_object(v, flag, p); 329 330 p->depth--; 331 } 332 333 static void 334 w_complex_object(PyObject *v, char flag, WFILE *p) 335 { 336 Py_ssize_t i, n; 337 338 if (PyLong_CheckExact(v)) { 339 long x = PyLong_AsLong(v); 340 if ((x == -1) && PyErr_Occurred()) { 341 PyLongObject *ob = (PyLongObject *)v; 342 PyErr_Clear(); 343 w_PyLong(ob, flag, p); 344 } 345 else { 346 #if SIZEOF_LONG > 4 347 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31); 348 if (y && y != -1) { 349 /* Too large for TYPE_INT */ 350 w_PyLong((PyLongObject*)v, flag, p); 351 } 352 else 353 #endif 354 { 355 W_TYPE(TYPE_INT, p); 356 w_long(x, p); 357 } 358 } 359 } 360 else if (PyFloat_CheckExact(v)) { 361 if (p->version > 1) { 362 unsigned char buf[8]; 363 if (_PyFloat_Pack8(PyFloat_AsDouble(v), 364 buf, 1) < 0) { 365 p->error = WFERR_UNMARSHALLABLE; 366 return; 367 } 368 W_TYPE(TYPE_BINARY_FLOAT, p); 369 w_string((char*)buf, 8, p); 370 } 371 else { 372 char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v), 373 'g', 17, 0, NULL); 374 if (!buf) { 375 p->error = WFERR_NOMEMORY; 376 return; 377 } 378 n = strlen(buf); 379 W_TYPE(TYPE_FLOAT, p); 380 w_byte((int)n, p); 381 w_string(buf, n, p); 382 PyMem_Free(buf); 383 } 384 } 385 else if (PyComplex_CheckExact(v)) { 386 if (p->version > 1) { 387 unsigned char buf[8]; 388 if (_PyFloat_Pack8(PyComplex_RealAsDouble(v), 389 buf, 1) < 0) { 390 p->error = WFERR_UNMARSHALLABLE; 391 return; 392 } 393 W_TYPE(TYPE_BINARY_COMPLEX, p); 394 w_string((char*)buf, 8, p); 395 if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v), 396 buf, 1) < 0) { 397 p->error = WFERR_UNMARSHALLABLE; 398 return; 399 } 400 w_string((char*)buf, 8, p); 401 } 402 else { 403 char *buf; 404 W_TYPE(TYPE_COMPLEX, p); 405 buf = PyOS_double_to_string(PyComplex_RealAsDouble(v), 406 'g', 17, 0, NULL); 407 if (!buf) { 408 p->error = WFERR_NOMEMORY; 409 return; 410 } 411 n = strlen(buf); 412 w_byte((int)n, p); 413 w_string(buf, n, p); 414 PyMem_Free(buf); 415 buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v), 416 'g', 17, 0, NULL); 417 if (!buf) { 418 p->error = WFERR_NOMEMORY; 419 return; 420 } 421 n = strlen(buf); 422 w_byte((int)n, p); 423 w_string(buf, n, p); 424 PyMem_Free(buf); 425 } 426 } 427 else if (PyBytes_CheckExact(v)) { 428 W_TYPE(TYPE_STRING, p); 429 w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p); 430 } 431 else if (PyUnicode_CheckExact(v)) { 432 if (p->version >= 4 && PyUnicode_IS_ASCII(v)) { 433 int is_short = PyUnicode_GET_LENGTH(v) < 256; 434 if (is_short) { 435 if (PyUnicode_CHECK_INTERNED(v)) 436 W_TYPE(TYPE_SHORT_ASCII_INTERNED, p); 437 else 438 W_TYPE(TYPE_SHORT_ASCII, p); 439 w_short_pstring((char *) PyUnicode_1BYTE_DATA(v), 440 PyUnicode_GET_LENGTH(v), p); 441 } 442 else { 443 if (PyUnicode_CHECK_INTERNED(v)) 444 W_TYPE(TYPE_ASCII_INTERNED, p); 445 else 446 W_TYPE(TYPE_ASCII, p); 447 w_pstring((char *) PyUnicode_1BYTE_DATA(v), 448 PyUnicode_GET_LENGTH(v), p); 449 } 450 } 451 else { 452 PyObject *utf8; 453 utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass"); 454 if (utf8 == NULL) { 455 p->depth--; 456 p->error = WFERR_UNMARSHALLABLE; 457 return; 458 } 459 if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v)) 460 W_TYPE(TYPE_INTERNED, p); 461 else 462 W_TYPE(TYPE_UNICODE, p); 463 w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p); 464 Py_DECREF(utf8); 465 } 466 } 467 else if (PyTuple_CheckExact(v)) { 468 n = PyTuple_Size(v); 469 if (p->version >= 4 && n < 256) { 470 W_TYPE(TYPE_SMALL_TUPLE, p); 471 w_byte((unsigned char)n, p); 472 } 473 else { 474 W_TYPE(TYPE_TUPLE, p); 475 W_SIZE(n, p); 476 } 477 for (i = 0; i < n; i++) { 478 w_object(PyTuple_GET_ITEM(v, i), p); 479 } 480 } 481 else if (PyList_CheckExact(v)) { 482 W_TYPE(TYPE_LIST, p); 483 n = PyList_GET_SIZE(v); 484 W_SIZE(n, p); 485 for (i = 0; i < n; i++) { 486 w_object(PyList_GET_ITEM(v, i), p); 487 } 488 } 489 else if (PyDict_CheckExact(v)) { 490 Py_ssize_t pos; 491 PyObject *key, *value; 492 W_TYPE(TYPE_DICT, p); 493 /* This one is NULL object terminated! */ 494 pos = 0; 495 while (PyDict_Next(v, &pos, &key, &value)) { 496 w_object(key, p); 497 w_object(value, p); 498 } 499 w_object((PyObject *)NULL, p); 500 } 501 else if (PyAnySet_CheckExact(v)) { 502 PyObject *value, *it; 503 504 if (PyObject_TypeCheck(v, &PySet_Type)) 505 W_TYPE(TYPE_SET, p); 506 else 507 W_TYPE(TYPE_FROZENSET, p); 508 n = PyObject_Size(v); 509 if (n == -1) { 510 p->depth--; 511 p->error = WFERR_UNMARSHALLABLE; 512 return; 513 } 514 W_SIZE(n, p); 515 it = PyObject_GetIter(v); 516 if (it == NULL) { 517 p->depth--; 518 p->error = WFERR_UNMARSHALLABLE; 519 return; 520 } 521 while ((value = PyIter_Next(it)) != NULL) { 522 w_object(value, p); 523 Py_DECREF(value); 524 } 525 Py_DECREF(it); 526 if (PyErr_Occurred()) { 527 p->depth--; 528 p->error = WFERR_UNMARSHALLABLE; 529 return; 530 } 531 } 532 else if (PyCode_Check(v)) { 533 PyCodeObject *co = (PyCodeObject *)v; 534 W_TYPE(TYPE_CODE, p); 535 w_long(co->co_argcount, p); 536 w_long(co->co_kwonlyargcount, p); 537 w_long(co->co_nlocals, p); 538 w_long(co->co_stacksize, p); 539 w_long(co->co_flags, p); 540 w_object(co->co_code, p); 541 w_object(co->co_consts, p); 542 w_object(co->co_names, p); 543 w_object(co->co_varnames, p); 544 w_object(co->co_freevars, p); 545 w_object(co->co_cellvars, p); 546 w_object(co->co_filename, p); 547 w_object(co->co_name, p); 548 w_long(co->co_firstlineno, p); 549 w_object(co->co_lnotab, p); 550 } 551 else if (PyObject_CheckBuffer(v)) { 552 /* Write unknown bytes-like objects as a byte string */ 553 Py_buffer view; 554 if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) { 555 w_byte(TYPE_UNKNOWN, p); 556 p->depth--; 557 p->error = WFERR_UNMARSHALLABLE; 558 return; 559 } 560 W_TYPE(TYPE_STRING, p); 561 w_pstring(view.buf, view.len, p); 562 PyBuffer_Release(&view); 563 } 564 else { 565 W_TYPE(TYPE_UNKNOWN, p); 566 p->error = WFERR_UNMARSHALLABLE; 567 } 568 } 569 570 static int 571 w_init_refs(WFILE *wf, int version) 572 { 573 if (version >= 3) { 574 wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int), 575 _Py_hashtable_hash_ptr, 576 _Py_hashtable_compare_direct); 577 if (wf->hashtable == NULL) { 578 PyErr_NoMemory(); 579 return -1; 580 } 581 } 582 return 0; 583 } 584 585 static int 586 w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry, 587 void *Py_UNUSED(data)) 588 { 589 PyObject *entry_key; 590 591 _Py_HASHTABLE_ENTRY_READ_KEY(ht, entry, entry_key); 592 Py_XDECREF(entry_key); 593 return 0; 594 } 595 596 static void 597 w_clear_refs(WFILE *wf) 598 { 599 if (wf->hashtable != NULL) { 600 _Py_hashtable_foreach(wf->hashtable, w_decref_entry, NULL); 601 _Py_hashtable_destroy(wf->hashtable); 602 } 603 } 604 605 /* version currently has no effect for writing ints. */ 606 void 607 PyMarshal_WriteLongToFile(long x, FILE *fp, int version) 608 { 609 char buf[4]; 610 WFILE wf; 611 memset(&wf, 0, sizeof(wf)); 612 wf.fp = fp; 613 wf.ptr = wf.buf = buf; 614 wf.end = wf.ptr + sizeof(buf); 615 wf.error = WFERR_OK; 616 wf.version = version; 617 w_long(x, &wf); 618 w_flush(&wf); 619 } 620 621 void 622 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version) 623 { 624 char buf[BUFSIZ]; 625 WFILE wf; 626 memset(&wf, 0, sizeof(wf)); 627 wf.fp = fp; 628 wf.ptr = wf.buf = buf; 629 wf.end = wf.ptr + sizeof(buf); 630 wf.error = WFERR_OK; 631 wf.version = version; 632 if (w_init_refs(&wf, version)) 633 return; /* caller mush check PyErr_Occurred() */ 634 w_object(x, &wf); 635 w_clear_refs(&wf); 636 w_flush(&wf); 637 } 638 639 typedef struct { 640 FILE *fp; 641 int depth; 642 PyObject *readable; /* Stream-like object being read from */ 643 PyObject *current_filename; 644 char *ptr; 645 char *end; 646 char *buf; 647 Py_ssize_t buf_size; 648 PyObject *refs; /* a list */ 649 } RFILE; 650 651 static const char * 652 r_string(Py_ssize_t n, RFILE *p) 653 { 654 Py_ssize_t read = -1; 655 656 if (p->ptr != NULL) { 657 /* Fast path for loads() */ 658 char *res = p->ptr; 659 Py_ssize_t left = p->end - p->ptr; 660 if (left < n) { 661 PyErr_SetString(PyExc_EOFError, 662 "marshal data too short"); 663 return NULL; 664 } 665 p->ptr += n; 666 return res; 667 } 668 if (p->buf == NULL) { 669 p->buf = PyMem_MALLOC(n); 670 if (p->buf == NULL) { 671 PyErr_NoMemory(); 672 return NULL; 673 } 674 p->buf_size = n; 675 } 676 else if (p->buf_size < n) { 677 p->buf = PyMem_REALLOC(p->buf, n); 678 if (p->buf == NULL) { 679 PyErr_NoMemory(); 680 return NULL; 681 } 682 p->buf_size = n; 683 } 684 685 if (!p->readable) { 686 assert(p->fp != NULL); 687 read = fread(p->buf, 1, n, p->fp); 688 } 689 else { 690 _Py_IDENTIFIER(readinto); 691 PyObject *res, *mview; 692 Py_buffer buf; 693 694 if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1) 695 return NULL; 696 mview = PyMemoryView_FromBuffer(&buf); 697 if (mview == NULL) 698 return NULL; 699 700 res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview); 701 if (res != NULL) { 702 read = PyNumber_AsSsize_t(res, PyExc_ValueError); 703 Py_DECREF(res); 704 } 705 } 706 if (read != n) { 707 if (!PyErr_Occurred()) { 708 if (read > n) 709 PyErr_Format(PyExc_ValueError, 710 "read() returned too much data: " 711 "%zd bytes requested, %zd returned", 712 n, read); 713 else 714 PyErr_SetString(PyExc_EOFError, 715 "EOF read where not expected"); 716 } 717 return NULL; 718 } 719 return p->buf; 720 } 721 722 static int 723 r_byte(RFILE *p) 724 { 725 int c = EOF; 726 727 if (p->ptr != NULL) { 728 if (p->ptr < p->end) 729 c = (unsigned char) *p->ptr++; 730 return c; 731 } 732 if (!p->readable) { 733 assert(p->fp); 734 c = getc(p->fp); 735 } 736 else { 737 const char *ptr = r_string(1, p); 738 if (ptr != NULL) 739 c = *(unsigned char *) ptr; 740 } 741 return c; 742 } 743 744 static int 745 r_short(RFILE *p) 746 { 747 short x = -1; 748 const unsigned char *buffer; 749 750 buffer = (const unsigned char *) r_string(2, p); 751 if (buffer != NULL) { 752 x = buffer[0]; 753 x |= buffer[1] << 8; 754 /* Sign-extension, in case short greater than 16 bits */ 755 x |= -(x & 0x8000); 756 } 757 return x; 758 } 759 760 static long 761 r_long(RFILE *p) 762 { 763 long x = -1; 764 const unsigned char *buffer; 765 766 buffer = (const unsigned char *) r_string(4, p); 767 if (buffer != NULL) { 768 x = buffer[0]; 769 x |= (long)buffer[1] << 8; 770 x |= (long)buffer[2] << 16; 771 x |= (long)buffer[3] << 24; 772 #if SIZEOF_LONG > 4 773 /* Sign extension for 64-bit machines */ 774 x |= -(x & 0x80000000L); 775 #endif 776 } 777 return x; 778 } 779 780 static PyObject * 781 r_PyLong(RFILE *p) 782 { 783 PyLongObject *ob; 784 long n, size, i; 785 int j, md, shorts_in_top_digit; 786 digit d; 787 788 n = r_long(p); 789 if (PyErr_Occurred()) 790 return NULL; 791 if (n == 0) 792 return (PyObject *)_PyLong_New(0); 793 if (n < -SIZE32_MAX || n > SIZE32_MAX) { 794 PyErr_SetString(PyExc_ValueError, 795 "bad marshal data (long size out of range)"); 796 return NULL; 797 } 798 799 size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO; 800 shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO; 801 ob = _PyLong_New(size); 802 if (ob == NULL) 803 return NULL; 804 805 Py_SIZE(ob) = n > 0 ? size : -size; 806 807 for (i = 0; i < size-1; i++) { 808 d = 0; 809 for (j=0; j < PyLong_MARSHAL_RATIO; j++) { 810 md = r_short(p); 811 if (PyErr_Occurred()) { 812 Py_DECREF(ob); 813 return NULL; 814 } 815 if (md < 0 || md > PyLong_MARSHAL_BASE) 816 goto bad_digit; 817 d += (digit)md << j*PyLong_MARSHAL_SHIFT; 818 } 819 ob->ob_digit[i] = d; 820 } 821 822 d = 0; 823 for (j=0; j < shorts_in_top_digit; j++) { 824 md = r_short(p); 825 if (PyErr_Occurred()) { 826 Py_DECREF(ob); 827 return NULL; 828 } 829 if (md < 0 || md > PyLong_MARSHAL_BASE) 830 goto bad_digit; 831 /* topmost marshal digit should be nonzero */ 832 if (md == 0 && j == shorts_in_top_digit - 1) { 833 Py_DECREF(ob); 834 PyErr_SetString(PyExc_ValueError, 835 "bad marshal data (unnormalized long data)"); 836 return NULL; 837 } 838 d += (digit)md << j*PyLong_MARSHAL_SHIFT; 839 } 840 if (PyErr_Occurred()) { 841 Py_DECREF(ob); 842 return NULL; 843 } 844 /* top digit should be nonzero, else the resulting PyLong won't be 845 normalized */ 846 ob->ob_digit[size-1] = d; 847 return (PyObject *)ob; 848 bad_digit: 849 Py_DECREF(ob); 850 PyErr_SetString(PyExc_ValueError, 851 "bad marshal data (digit out of range in long)"); 852 return NULL; 853 } 854 855 /* allocate the reflist index for a new object. Return -1 on failure */ 856 static Py_ssize_t 857 r_ref_reserve(int flag, RFILE *p) 858 { 859 if (flag) { /* currently only FLAG_REF is defined */ 860 Py_ssize_t idx = PyList_GET_SIZE(p->refs); 861 if (idx >= 0x7ffffffe) { 862 PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)"); 863 return -1; 864 } 865 if (PyList_Append(p->refs, Py_None) < 0) 866 return -1; 867 return idx; 868 } else 869 return 0; 870 } 871 872 /* insert the new object 'o' to the reflist at previously 873 * allocated index 'idx'. 874 * 'o' can be NULL, in which case nothing is done. 875 * if 'o' was non-NULL, and the function succeeds, 'o' is returned. 876 * if 'o' was non-NULL, and the function fails, 'o' is released and 877 * NULL returned. This simplifies error checking at the call site since 878 * a single test for NULL for the function result is enough. 879 */ 880 static PyObject * 881 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p) 882 { 883 if (o != NULL && flag) { /* currently only FLAG_REF is defined */ 884 PyObject *tmp = PyList_GET_ITEM(p->refs, idx); 885 Py_INCREF(o); 886 PyList_SET_ITEM(p->refs, idx, o); 887 Py_DECREF(tmp); 888 } 889 return o; 890 } 891 892 /* combination of both above, used when an object can be 893 * created whenever it is seen in the file, as opposed to 894 * after having loaded its sub-objects. 895 */ 896 static PyObject * 897 r_ref(PyObject *o, int flag, RFILE *p) 898 { 899 assert(flag & FLAG_REF); 900 if (o == NULL) 901 return NULL; 902 if (PyList_Append(p->refs, o) < 0) { 903 Py_DECREF(o); /* release the new object */ 904 return NULL; 905 } 906 return o; 907 } 908 909 static PyObject * 910 r_object(RFILE *p) 911 { 912 /* NULL is a valid return value, it does not necessarily means that 913 an exception is set. */ 914 PyObject *v, *v2; 915 Py_ssize_t idx = 0; 916 long i, n; 917 int type, code = r_byte(p); 918 int flag, is_interned = 0; 919 PyObject *retval = NULL; 920 921 if (code == EOF) { 922 PyErr_SetString(PyExc_EOFError, 923 "EOF read where object expected"); 924 return NULL; 925 } 926 927 p->depth++; 928 929 if (p->depth > MAX_MARSHAL_STACK_DEPTH) { 930 p->depth--; 931 PyErr_SetString(PyExc_ValueError, "recursion limit exceeded"); 932 return NULL; 933 } 934 935 flag = code & FLAG_REF; 936 type = code & ~FLAG_REF; 937 938 #define R_REF(O) do{\ 939 if (flag) \ 940 O = r_ref(O, flag, p);\ 941 } while (0) 942 943 switch (type) { 944 945 case TYPE_NULL: 946 break; 947 948 case TYPE_NONE: 949 Py_INCREF(Py_None); 950 retval = Py_None; 951 break; 952 953 case TYPE_STOPITER: 954 Py_INCREF(PyExc_StopIteration); 955 retval = PyExc_StopIteration; 956 break; 957 958 case TYPE_ELLIPSIS: 959 Py_INCREF(Py_Ellipsis); 960 retval = Py_Ellipsis; 961 break; 962 963 case TYPE_FALSE: 964 Py_INCREF(Py_False); 965 retval = Py_False; 966 break; 967 968 case TYPE_TRUE: 969 Py_INCREF(Py_True); 970 retval = Py_True; 971 break; 972 973 case TYPE_INT: 974 n = r_long(p); 975 retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n); 976 R_REF(retval); 977 break; 978 979 case TYPE_LONG: 980 retval = r_PyLong(p); 981 R_REF(retval); 982 break; 983 984 case TYPE_FLOAT: 985 { 986 char buf[256]; 987 const char *ptr; 988 double dx; 989 n = r_byte(p); 990 if (n == EOF) { 991 PyErr_SetString(PyExc_EOFError, 992 "EOF read where object expected"); 993 break; 994 } 995 ptr = r_string(n, p); 996 if (ptr == NULL) 997 break; 998 memcpy(buf, ptr, n); 999 buf[n] = '\0'; 1000 dx = PyOS_string_to_double(buf, NULL, NULL); 1001 if (dx == -1.0 && PyErr_Occurred()) 1002 break; 1003 retval = PyFloat_FromDouble(dx); 1004 R_REF(retval); 1005 break; 1006 } 1007 1008 case TYPE_BINARY_FLOAT: 1009 { 1010 const unsigned char *buf; 1011 double x; 1012 buf = (const unsigned char *) r_string(8, p); 1013 if (buf == NULL) 1014 break; 1015 x = _PyFloat_Unpack8(buf, 1); 1016 if (x == -1.0 && PyErr_Occurred()) 1017 break; 1018 retval = PyFloat_FromDouble(x); 1019 R_REF(retval); 1020 break; 1021 } 1022 1023 case TYPE_COMPLEX: 1024 { 1025 char buf[256]; 1026 const char *ptr; 1027 Py_complex c; 1028 n = r_byte(p); 1029 if (n == EOF) { 1030 PyErr_SetString(PyExc_EOFError, 1031 "EOF read where object expected"); 1032 break; 1033 } 1034 ptr = r_string(n, p); 1035 if (ptr == NULL) 1036 break; 1037 memcpy(buf, ptr, n); 1038 buf[n] = '\0'; 1039 c.real = PyOS_string_to_double(buf, NULL, NULL); 1040 if (c.real == -1.0 && PyErr_Occurred()) 1041 break; 1042 n = r_byte(p); 1043 if (n == EOF) { 1044 PyErr_SetString(PyExc_EOFError, 1045 "EOF read where object expected"); 1046 break; 1047 } 1048 ptr = r_string(n, p); 1049 if (ptr == NULL) 1050 break; 1051 memcpy(buf, ptr, n); 1052 buf[n] = '\0'; 1053 c.imag = PyOS_string_to_double(buf, NULL, NULL); 1054 if (c.imag == -1.0 && PyErr_Occurred()) 1055 break; 1056 retval = PyComplex_FromCComplex(c); 1057 R_REF(retval); 1058 break; 1059 } 1060 1061 case TYPE_BINARY_COMPLEX: 1062 { 1063 const unsigned char *buf; 1064 Py_complex c; 1065 buf = (const unsigned char *) r_string(8, p); 1066 if (buf == NULL) 1067 break; 1068 c.real = _PyFloat_Unpack8(buf, 1); 1069 if (c.real == -1.0 && PyErr_Occurred()) 1070 break; 1071 buf = (const unsigned char *) r_string(8, p); 1072 if (buf == NULL) 1073 break; 1074 c.imag = _PyFloat_Unpack8(buf, 1); 1075 if (c.imag == -1.0 && PyErr_Occurred()) 1076 break; 1077 retval = PyComplex_FromCComplex(c); 1078 R_REF(retval); 1079 break; 1080 } 1081 1082 case TYPE_STRING: 1083 { 1084 const char *ptr; 1085 n = r_long(p); 1086 if (PyErr_Occurred()) 1087 break; 1088 if (n < 0 || n > SIZE32_MAX) { 1089 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); 1090 break; 1091 } 1092 v = PyBytes_FromStringAndSize((char *)NULL, n); 1093 if (v == NULL) 1094 break; 1095 ptr = r_string(n, p); 1096 if (ptr == NULL) { 1097 Py_DECREF(v); 1098 break; 1099 } 1100 memcpy(PyBytes_AS_STRING(v), ptr, n); 1101 retval = v; 1102 R_REF(retval); 1103 break; 1104 } 1105 1106 case TYPE_ASCII_INTERNED: 1107 is_interned = 1; 1108 case TYPE_ASCII: 1109 n = r_long(p); 1110 if (PyErr_Occurred()) 1111 break; 1112 if (n < 0 || n > SIZE32_MAX) { 1113 PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); 1114 break; 1115 } 1116 goto _read_ascii; 1117 1118 case TYPE_SHORT_ASCII_INTERNED: 1119 is_interned = 1; 1120 case TYPE_SHORT_ASCII: 1121 n = r_byte(p); 1122 if (n == EOF) { 1123 PyErr_SetString(PyExc_EOFError, 1124 "EOF read where object expected"); 1125 break; 1126 } 1127 _read_ascii: 1128 { 1129 const char *ptr; 1130 ptr = r_string(n, p); 1131 if (ptr == NULL) 1132 break; 1133 v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n); 1134 if (v == NULL) 1135 break; 1136 if (is_interned) 1137 PyUnicode_InternInPlace(&v); 1138 retval = v; 1139 R_REF(retval); 1140 break; 1141 } 1142 1143 case TYPE_INTERNED: 1144 is_interned = 1; 1145 case TYPE_UNICODE: 1146 { 1147 const char *buffer; 1148 1149 n = r_long(p); 1150 if (PyErr_Occurred()) 1151 break; 1152 if (n < 0 || n > SIZE32_MAX) { 1153 PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); 1154 break; 1155 } 1156 if (n != 0) { 1157 buffer = r_string(n, p); 1158 if (buffer == NULL) 1159 break; 1160 v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass"); 1161 } 1162 else { 1163 v = PyUnicode_New(0, 0); 1164 } 1165 if (v == NULL) 1166 break; 1167 if (is_interned) 1168 PyUnicode_InternInPlace(&v); 1169 retval = v; 1170 R_REF(retval); 1171 break; 1172 } 1173 1174 case TYPE_SMALL_TUPLE: 1175 n = (unsigned char) r_byte(p); 1176 if (PyErr_Occurred()) 1177 break; 1178 goto _read_tuple; 1179 case TYPE_TUPLE: 1180 n = r_long(p); 1181 if (PyErr_Occurred()) 1182 break; 1183 if (n < 0 || n > SIZE32_MAX) { 1184 PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); 1185 break; 1186 } 1187 _read_tuple: 1188 v = PyTuple_New(n); 1189 R_REF(v); 1190 if (v == NULL) 1191 break; 1192 1193 for (i = 0; i < n; i++) { 1194 v2 = r_object(p); 1195 if ( v2 == NULL ) { 1196 if (!PyErr_Occurred()) 1197 PyErr_SetString(PyExc_TypeError, 1198 "NULL object in marshal data for tuple"); 1199 Py_DECREF(v); 1200 v = NULL; 1201 break; 1202 } 1203 PyTuple_SET_ITEM(v, i, v2); 1204 } 1205 retval = v; 1206 break; 1207 1208 case TYPE_LIST: 1209 n = r_long(p); 1210 if (PyErr_Occurred()) 1211 break; 1212 if (n < 0 || n > SIZE32_MAX) { 1213 PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); 1214 break; 1215 } 1216 v = PyList_New(n); 1217 R_REF(v); 1218 if (v == NULL) 1219 break; 1220 for (i = 0; i < n; i++) { 1221 v2 = r_object(p); 1222 if ( v2 == NULL ) { 1223 if (!PyErr_Occurred()) 1224 PyErr_SetString(PyExc_TypeError, 1225 "NULL object in marshal data for list"); 1226 Py_DECREF(v); 1227 v = NULL; 1228 break; 1229 } 1230 PyList_SET_ITEM(v, i, v2); 1231 } 1232 retval = v; 1233 break; 1234 1235 case TYPE_DICT: 1236 v = PyDict_New(); 1237 R_REF(v); 1238 if (v == NULL) 1239 break; 1240 for (;;) { 1241 PyObject *key, *val; 1242 key = r_object(p); 1243 if (key == NULL) 1244 break; 1245 val = r_object(p); 1246 if (val == NULL) { 1247 Py_DECREF(key); 1248 break; 1249 } 1250 if (PyDict_SetItem(v, key, val) < 0) { 1251 Py_DECREF(key); 1252 Py_DECREF(val); 1253 break; 1254 } 1255 Py_DECREF(key); 1256 Py_DECREF(val); 1257 } 1258 if (PyErr_Occurred()) { 1259 Py_DECREF(v); 1260 v = NULL; 1261 } 1262 retval = v; 1263 break; 1264 1265 case TYPE_SET: 1266 case TYPE_FROZENSET: 1267 n = r_long(p); 1268 if (PyErr_Occurred()) 1269 break; 1270 if (n < 0 || n > SIZE32_MAX) { 1271 PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); 1272 break; 1273 } 1274 1275 if (n == 0 && type == TYPE_FROZENSET) { 1276 /* call frozenset() to get the empty frozenset singleton */ 1277 v = PyObject_CallFunction((PyObject*)&PyFrozenSet_Type, NULL); 1278 if (v == NULL) 1279 break; 1280 R_REF(v); 1281 retval = v; 1282 } 1283 else { 1284 v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL); 1285 if (type == TYPE_SET) { 1286 R_REF(v); 1287 } else { 1288 /* must use delayed registration of frozensets because they must 1289 * be init with a refcount of 1 1290 */ 1291 idx = r_ref_reserve(flag, p); 1292 if (idx < 0) 1293 Py_CLEAR(v); /* signal error */ 1294 } 1295 if (v == NULL) 1296 break; 1297 1298 for (i = 0; i < n; i++) { 1299 v2 = r_object(p); 1300 if ( v2 == NULL ) { 1301 if (!PyErr_Occurred()) 1302 PyErr_SetString(PyExc_TypeError, 1303 "NULL object in marshal data for set"); 1304 Py_DECREF(v); 1305 v = NULL; 1306 break; 1307 } 1308 if (PySet_Add(v, v2) == -1) { 1309 Py_DECREF(v); 1310 Py_DECREF(v2); 1311 v = NULL; 1312 break; 1313 } 1314 Py_DECREF(v2); 1315 } 1316 if (type != TYPE_SET) 1317 v = r_ref_insert(v, idx, flag, p); 1318 retval = v; 1319 } 1320 break; 1321 1322 case TYPE_CODE: 1323 { 1324 int argcount; 1325 int kwonlyargcount; 1326 int nlocals; 1327 int stacksize; 1328 int flags; 1329 PyObject *code = NULL; 1330 PyObject *consts = NULL; 1331 PyObject *names = NULL; 1332 PyObject *varnames = NULL; 1333 PyObject *freevars = NULL; 1334 PyObject *cellvars = NULL; 1335 PyObject *filename = NULL; 1336 PyObject *name = NULL; 1337 int firstlineno; 1338 PyObject *lnotab = NULL; 1339 1340 idx = r_ref_reserve(flag, p); 1341 if (idx < 0) 1342 break; 1343 1344 v = NULL; 1345 1346 /* XXX ignore long->int overflows for now */ 1347 argcount = (int)r_long(p); 1348 if (PyErr_Occurred()) 1349 goto code_error; 1350 kwonlyargcount = (int)r_long(p); 1351 if (PyErr_Occurred()) 1352 goto code_error; 1353 nlocals = (int)r_long(p); 1354 if (PyErr_Occurred()) 1355 goto code_error; 1356 stacksize = (int)r_long(p); 1357 if (PyErr_Occurred()) 1358 goto code_error; 1359 flags = (int)r_long(p); 1360 if (PyErr_Occurred()) 1361 goto code_error; 1362 code = r_object(p); 1363 if (code == NULL) 1364 goto code_error; 1365 consts = r_object(p); 1366 if (consts == NULL) 1367 goto code_error; 1368 names = r_object(p); 1369 if (names == NULL) 1370 goto code_error; 1371 varnames = r_object(p); 1372 if (varnames == NULL) 1373 goto code_error; 1374 freevars = r_object(p); 1375 if (freevars == NULL) 1376 goto code_error; 1377 cellvars = r_object(p); 1378 if (cellvars == NULL) 1379 goto code_error; 1380 filename = r_object(p); 1381 if (filename == NULL) 1382 goto code_error; 1383 if (PyUnicode_CheckExact(filename)) { 1384 if (p->current_filename != NULL) { 1385 if (!PyUnicode_Compare(filename, p->current_filename)) { 1386 Py_DECREF(filename); 1387 Py_INCREF(p->current_filename); 1388 filename = p->current_filename; 1389 } 1390 } 1391 else { 1392 p->current_filename = filename; 1393 } 1394 } 1395 name = r_object(p); 1396 if (name == NULL) 1397 goto code_error; 1398 firstlineno = (int)r_long(p); 1399 if (firstlineno == -1 && PyErr_Occurred()) 1400 break; 1401 lnotab = r_object(p); 1402 if (lnotab == NULL) 1403 goto code_error; 1404 1405 v = (PyObject *) PyCode_New( 1406 argcount, kwonlyargcount, 1407 nlocals, stacksize, flags, 1408 code, consts, names, varnames, 1409 freevars, cellvars, filename, name, 1410 firstlineno, lnotab); 1411 v = r_ref_insert(v, idx, flag, p); 1412 1413 code_error: 1414 Py_XDECREF(code); 1415 Py_XDECREF(consts); 1416 Py_XDECREF(names); 1417 Py_XDECREF(varnames); 1418 Py_XDECREF(freevars); 1419 Py_XDECREF(cellvars); 1420 Py_XDECREF(filename); 1421 Py_XDECREF(name); 1422 Py_XDECREF(lnotab); 1423 } 1424 retval = v; 1425 break; 1426 1427 case TYPE_REF: 1428 n = r_long(p); 1429 if (n < 0 || n >= PyList_GET_SIZE(p->refs)) { 1430 if (n == -1 && PyErr_Occurred()) 1431 break; 1432 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); 1433 break; 1434 } 1435 v = PyList_GET_ITEM(p->refs, n); 1436 if (v == Py_None) { 1437 PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); 1438 break; 1439 } 1440 Py_INCREF(v); 1441 retval = v; 1442 break; 1443 1444 default: 1445 /* Bogus data got written, which isn't ideal. 1446 This will let you keep working and recover. */ 1447 PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)"); 1448 break; 1449 1450 } 1451 p->depth--; 1452 return retval; 1453 } 1454 1455 static PyObject * 1456 read_object(RFILE *p) 1457 { 1458 PyObject *v; 1459 if (PyErr_Occurred()) { 1460 fprintf(stderr, "XXX readobject called with exception set\n"); 1461 return NULL; 1462 } 1463 v = r_object(p); 1464 if (v == NULL && !PyErr_Occurred()) 1465 PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object"); 1466 return v; 1467 } 1468 1469 int 1470 PyMarshal_ReadShortFromFile(FILE *fp) 1471 { 1472 RFILE rf; 1473 int res; 1474 assert(fp); 1475 rf.readable = NULL; 1476 rf.fp = fp; 1477 rf.current_filename = NULL; 1478 rf.end = rf.ptr = NULL; 1479 rf.buf = NULL; 1480 res = r_short(&rf); 1481 if (rf.buf != NULL) 1482 PyMem_FREE(rf.buf); 1483 return res; 1484 } 1485 1486 long 1487 PyMarshal_ReadLongFromFile(FILE *fp) 1488 { 1489 RFILE rf; 1490 long res; 1491 rf.fp = fp; 1492 rf.readable = NULL; 1493 rf.current_filename = NULL; 1494 rf.ptr = rf.end = NULL; 1495 rf.buf = NULL; 1496 res = r_long(&rf); 1497 if (rf.buf != NULL) 1498 PyMem_FREE(rf.buf); 1499 return res; 1500 } 1501 1502 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */ 1503 static off_t 1504 getfilesize(FILE *fp) 1505 { 1506 struct _Py_stat_struct st; 1507 if (_Py_fstat_noraise(fileno(fp), &st) != 0) 1508 return -1; 1509 #if SIZEOF_OFF_T == 4 1510 else if (st.st_size >= INT_MAX) 1511 return (off_t)INT_MAX; 1512 #endif 1513 else 1514 return (off_t)st.st_size; 1515 } 1516 1517 /* If we can get the size of the file up-front, and it's reasonably small, 1518 * read it in one gulp and delegate to ...FromString() instead. Much quicker 1519 * than reading a byte at a time from file; speeds .pyc imports. 1520 * CAUTION: since this may read the entire remainder of the file, don't 1521 * call it unless you know you're done with the file. 1522 */ 1523 PyObject * 1524 PyMarshal_ReadLastObjectFromFile(FILE *fp) 1525 { 1526 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */ 1527 #define REASONABLE_FILE_LIMIT (1L << 18) 1528 off_t filesize; 1529 filesize = getfilesize(fp); 1530 if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) { 1531 char* pBuf = (char *)PyMem_MALLOC(filesize); 1532 if (pBuf != NULL) { 1533 size_t n = fread(pBuf, 1, (size_t)filesize, fp); 1534 PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n); 1535 PyMem_FREE(pBuf); 1536 return v; 1537 } 1538 1539 } 1540 /* We don't have fstat, or we do but the file is larger than 1541 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time. 1542 */ 1543 return PyMarshal_ReadObjectFromFile(fp); 1544 1545 #undef REASONABLE_FILE_LIMIT 1546 } 1547 1548 PyObject * 1549 PyMarshal_ReadObjectFromFile(FILE *fp) 1550 { 1551 RFILE rf; 1552 PyObject *result; 1553 rf.fp = fp; 1554 rf.readable = NULL; 1555 rf.current_filename = NULL; 1556 rf.depth = 0; 1557 rf.ptr = rf.end = NULL; 1558 rf.buf = NULL; 1559 rf.refs = PyList_New(0); 1560 if (rf.refs == NULL) 1561 return NULL; 1562 result = r_object(&rf); 1563 Py_DECREF(rf.refs); 1564 if (rf.buf != NULL) 1565 PyMem_FREE(rf.buf); 1566 return result; 1567 } 1568 1569 PyObject * 1570 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len) 1571 { 1572 RFILE rf; 1573 PyObject *result; 1574 rf.fp = NULL; 1575 rf.readable = NULL; 1576 rf.current_filename = NULL; 1577 rf.ptr = (char *)str; 1578 rf.end = (char *)str + len; 1579 rf.buf = NULL; 1580 rf.depth = 0; 1581 rf.refs = PyList_New(0); 1582 if (rf.refs == NULL) 1583 return NULL; 1584 result = r_object(&rf); 1585 Py_DECREF(rf.refs); 1586 if (rf.buf != NULL) 1587 PyMem_FREE(rf.buf); 1588 return result; 1589 } 1590 1591 PyObject * 1592 PyMarshal_WriteObjectToString(PyObject *x, int version) 1593 { 1594 WFILE wf; 1595 1596 memset(&wf, 0, sizeof(wf)); 1597 wf.str = PyBytes_FromStringAndSize((char *)NULL, 50); 1598 if (wf.str == NULL) 1599 return NULL; 1600 wf.ptr = wf.buf = PyBytes_AS_STRING((PyBytesObject *)wf.str); 1601 wf.end = wf.ptr + PyBytes_Size(wf.str); 1602 wf.error = WFERR_OK; 1603 wf.version = version; 1604 if (w_init_refs(&wf, version)) { 1605 Py_DECREF(wf.str); 1606 return NULL; 1607 } 1608 w_object(x, &wf); 1609 w_clear_refs(&wf); 1610 if (wf.str != NULL) { 1611 char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str); 1612 if (wf.ptr - base > PY_SSIZE_T_MAX) { 1613 Py_DECREF(wf.str); 1614 PyErr_SetString(PyExc_OverflowError, 1615 "too much marshal data for a string"); 1616 return NULL; 1617 } 1618 if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0) 1619 return NULL; 1620 } 1621 if (wf.error != WFERR_OK) { 1622 Py_XDECREF(wf.str); 1623 if (wf.error == WFERR_NOMEMORY) 1624 PyErr_NoMemory(); 1625 else 1626 PyErr_SetString(PyExc_ValueError, 1627 (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object" 1628 :"object too deeply nested to marshal"); 1629 return NULL; 1630 } 1631 return wf.str; 1632 } 1633 1634 /* And an interface for Python programs... */ 1635 1636 static PyObject * 1637 marshal_dump(PyObject *self, PyObject *args) 1638 { 1639 /* XXX Quick hack -- need to do this differently */ 1640 PyObject *x; 1641 PyObject *f; 1642 int version = Py_MARSHAL_VERSION; 1643 PyObject *s; 1644 PyObject *res; 1645 _Py_IDENTIFIER(write); 1646 1647 if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version)) 1648 return NULL; 1649 s = PyMarshal_WriteObjectToString(x, version); 1650 if (s == NULL) 1651 return NULL; 1652 res = _PyObject_CallMethodId(f, &PyId_write, "O", s); 1653 Py_DECREF(s); 1654 return res; 1655 } 1656 1657 PyDoc_STRVAR(dump_doc, 1658 "dump(value, file[, version])\n\ 1659 \n\ 1660 Write the value on the open file. The value must be a supported type.\n\ 1661 The file must be an open file object such as sys.stdout or returned by\n\ 1662 open() or os.popen(). It must be opened in binary mode ('wb' or 'w+b').\n\ 1663 \n\ 1664 If the value has (or contains an object that has) an unsupported type, a\n\ 1665 ValueError exception is raised - but garbage data will also be written\n\ 1666 to the file. The object will not be properly read back by load()\n\ 1667 \n\ 1668 The version argument indicates the data format that dump should use."); 1669 1670 static PyObject * 1671 marshal_load(PyObject *self, PyObject *f) 1672 { 1673 PyObject *data, *result; 1674 _Py_IDENTIFIER(read); 1675 RFILE rf; 1676 1677 /* 1678 * Make a call to the read method, but read zero bytes. 1679 * This is to ensure that the object passed in at least 1680 * has a read method which returns bytes. 1681 * This can be removed if we guarantee good error handling 1682 * for r_string() 1683 */ 1684 data = _PyObject_CallMethodId(f, &PyId_read, "i", 0); 1685 if (data == NULL) 1686 return NULL; 1687 if (!PyBytes_Check(data)) { 1688 PyErr_Format(PyExc_TypeError, 1689 "f.read() returned not bytes but %.100s", 1690 data->ob_type->tp_name); 1691 result = NULL; 1692 } 1693 else { 1694 rf.depth = 0; 1695 rf.fp = NULL; 1696 rf.readable = f; 1697 rf.current_filename = NULL; 1698 rf.ptr = rf.end = NULL; 1699 rf.buf = NULL; 1700 if ((rf.refs = PyList_New(0)) != NULL) { 1701 result = read_object(&rf); 1702 Py_DECREF(rf.refs); 1703 if (rf.buf != NULL) 1704 PyMem_FREE(rf.buf); 1705 } else 1706 result = NULL; 1707 } 1708 Py_DECREF(data); 1709 return result; 1710 } 1711 1712 PyDoc_STRVAR(load_doc, 1713 "load(file)\n\ 1714 \n\ 1715 Read one value from the open file and return it. If no valid value is\n\ 1716 read (e.g. because the data has a different Python version's\n\ 1717 incompatible marshal format), raise EOFError, ValueError or TypeError.\n\ 1718 The file must be an open file object opened in binary mode ('rb' or\n\ 1719 'r+b').\n\ 1720 \n\ 1721 Note: If an object containing an unsupported type was marshalled with\n\ 1722 dump(), load() will substitute None for the unmarshallable type."); 1723 1724 1725 static PyObject * 1726 marshal_dumps(PyObject *self, PyObject *args) 1727 { 1728 PyObject *x; 1729 int version = Py_MARSHAL_VERSION; 1730 if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version)) 1731 return NULL; 1732 return PyMarshal_WriteObjectToString(x, version); 1733 } 1734 1735 PyDoc_STRVAR(dumps_doc, 1736 "dumps(value[, version])\n\ 1737 \n\ 1738 Return the string that would be written to a file by dump(value, file).\n\ 1739 The value must be a supported type. Raise a ValueError exception if\n\ 1740 value has (or contains an object that has) an unsupported type.\n\ 1741 \n\ 1742 The version argument indicates the data format that dumps should use."); 1743 1744 1745 static PyObject * 1746 marshal_loads(PyObject *self, PyObject *args) 1747 { 1748 RFILE rf; 1749 Py_buffer p; 1750 char *s; 1751 Py_ssize_t n; 1752 PyObject* result; 1753 if (!PyArg_ParseTuple(args, "y*:loads", &p)) 1754 return NULL; 1755 s = p.buf; 1756 n = p.len; 1757 rf.fp = NULL; 1758 rf.readable = NULL; 1759 rf.current_filename = NULL; 1760 rf.ptr = s; 1761 rf.end = s + n; 1762 rf.depth = 0; 1763 if ((rf.refs = PyList_New(0)) == NULL) 1764 return NULL; 1765 result = read_object(&rf); 1766 PyBuffer_Release(&p); 1767 Py_DECREF(rf.refs); 1768 return result; 1769 } 1770 1771 PyDoc_STRVAR(loads_doc, 1772 "loads(bytes)\n\ 1773 \n\ 1774 Convert the bytes object to a value. If no valid value is found, raise\n\ 1775 EOFError, ValueError or TypeError. Extra characters in the input are\n\ 1776 ignored."); 1777 1778 static PyMethodDef marshal_methods[] = { 1779 {"dump", marshal_dump, METH_VARARGS, dump_doc}, 1780 {"load", marshal_load, METH_O, load_doc}, 1781 {"dumps", marshal_dumps, METH_VARARGS, dumps_doc}, 1782 {"loads", marshal_loads, METH_VARARGS, loads_doc}, 1783 {NULL, NULL} /* sentinel */ 1784 }; 1785 1786 1787 PyDoc_STRVAR(module_doc, 1788 "This module contains functions that can read and write Python values in\n\ 1789 a binary format. The format is specific to Python, but independent of\n\ 1790 machine architecture issues.\n\ 1791 \n\ 1792 Not all Python object types are supported; in general, only objects\n\ 1793 whose value is independent from a particular invocation of Python can be\n\ 1794 written and read by this module. The following types are supported:\n\ 1795 None, integers, floating point numbers, strings, bytes, bytearrays,\n\ 1796 tuples, lists, sets, dictionaries, and code objects, where it\n\ 1797 should be understood that tuples, lists and dictionaries are only\n\ 1798 supported as long as the values contained therein are themselves\n\ 1799 supported; and recursive lists and dictionaries should not be written\n\ 1800 (they will cause infinite loops).\n\ 1801 \n\ 1802 Variables:\n\ 1803 \n\ 1804 version -- indicates the format that the module uses. Version 0 is the\n\ 1805 historical format, version 1 shares interned strings and version 2\n\ 1806 uses a binary format for floating point numbers.\n\ 1807 Version 3 shares common object references (New in version 3.4).\n\ 1808 \n\ 1809 Functions:\n\ 1810 \n\ 1811 dump() -- write value to a file\n\ 1812 load() -- read value from a file\n\ 1813 dumps() -- write value to a string\n\ 1814 loads() -- read value from a string"); 1815 1816 1817 1818 static struct PyModuleDef marshalmodule = { 1819 PyModuleDef_HEAD_INIT, 1820 "marshal", 1821 module_doc, 1822 0, 1823 marshal_methods, 1824 NULL, 1825 NULL, 1826 NULL, 1827 NULL 1828 }; 1829 1830 PyMODINIT_FUNC 1831 PyMarshal_Init(void) 1832 { 1833 PyObject *mod = PyModule_Create(&marshalmodule); 1834 if (mod == NULL) 1835 return NULL; 1836 PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION); 1837 return mod; 1838 } 1839