Home | History | Annotate | Download | only in Python
      1 
      2 /* Write Python objects to files and read them back.
      3    This is primarily intended for writing and reading compiled Python code,
      4    even though dicts, lists, sets and frozensets, not commonly seen in
      5    code objects, are supported.
      6    Version 3 of this protocol properly supports circular links
      7    and sharing. */
      8 
      9 #define PY_SSIZE_T_CLEAN
     10 
     11 #include "Python.h"
     12 #include "longintrepr.h"
     13 #include "code.h"
     14 #include "marshal.h"
     15 #include "../Modules/hashtable.h"
     16 
     17 /* High water mark to determine when the marshalled object is dangerously deep
     18  * and risks coring the interpreter.  When the object stack gets this deep,
     19  * raise an exception instead of continuing.
     20  * On Windows debug builds, reduce this value.
     21  */
     22 #if defined(MS_WINDOWS) && defined(_DEBUG)
     23 #define MAX_MARSHAL_STACK_DEPTH 1000
     24 #else
     25 #define MAX_MARSHAL_STACK_DEPTH 2000
     26 #endif
     27 
     28 #define TYPE_NULL               '0'
     29 #define TYPE_NONE               'N'
     30 #define TYPE_FALSE              'F'
     31 #define TYPE_TRUE               'T'
     32 #define TYPE_STOPITER           'S'
     33 #define TYPE_ELLIPSIS           '.'
     34 #define TYPE_INT                'i'
     35 #define TYPE_FLOAT              'f'
     36 #define TYPE_BINARY_FLOAT       'g'
     37 #define TYPE_COMPLEX            'x'
     38 #define TYPE_BINARY_COMPLEX     'y'
     39 #define TYPE_LONG               'l'
     40 #define TYPE_STRING             's'
     41 #define TYPE_INTERNED           't'
     42 #define TYPE_REF                'r'
     43 #define TYPE_TUPLE              '('
     44 #define TYPE_LIST               '['
     45 #define TYPE_DICT               '{'
     46 #define TYPE_CODE               'c'
     47 #define TYPE_UNICODE            'u'
     48 #define TYPE_UNKNOWN            '?'
     49 #define TYPE_SET                '<'
     50 #define TYPE_FROZENSET          '>'
     51 #define FLAG_REF                '\x80' /* with a type, add obj to index */
     52 
     53 #define TYPE_ASCII              'a'
     54 #define TYPE_ASCII_INTERNED     'A'
     55 #define TYPE_SMALL_TUPLE        ')'
     56 #define TYPE_SHORT_ASCII        'z'
     57 #define TYPE_SHORT_ASCII_INTERNED 'Z'
     58 
     59 #define WFERR_OK 0
     60 #define WFERR_UNMARSHALLABLE 1
     61 #define WFERR_NESTEDTOODEEP 2
     62 #define WFERR_NOMEMORY 3
     63 
     64 typedef struct {
     65     FILE *fp;
     66     int error;  /* see WFERR_* values */
     67     int depth;
     68     PyObject *str;
     69     char *ptr;
     70     char *end;
     71     char *buf;
     72     _Py_hashtable_t *hashtable;
     73     int version;
     74 } WFILE;
     75 
     76 #define w_byte(c, p) do {                               \
     77         if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
     78             *(p)->ptr++ = (c);                          \
     79     } while(0)
     80 
     81 static void
     82 w_flush(WFILE *p)
     83 {
     84     assert(p->fp != NULL);
     85     fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
     86     p->ptr = p->buf;
     87 }
     88 
     89 static int
     90 w_reserve(WFILE *p, Py_ssize_t needed)
     91 {
     92     Py_ssize_t pos, size, delta;
     93     if (p->ptr == NULL)
     94         return 0; /* An error already occurred */
     95     if (p->fp != NULL) {
     96         w_flush(p);
     97         return needed <= p->end - p->ptr;
     98     }
     99     assert(p->str != NULL);
    100     pos = p->ptr - p->buf;
    101     size = PyBytes_Size(p->str);
    102     if (size > 16*1024*1024)
    103         delta = (size >> 3);            /* 12.5% overallocation */
    104     else
    105         delta = size + 1024;
    106     delta = Py_MAX(delta, needed);
    107     if (delta > PY_SSIZE_T_MAX - size) {
    108         p->error = WFERR_NOMEMORY;
    109         return 0;
    110     }
    111     size += delta;
    112     if (_PyBytes_Resize(&p->str, size) != 0) {
    113         p->ptr = p->buf = p->end = NULL;
    114         return 0;
    115     }
    116     else {
    117         p->buf = PyBytes_AS_STRING(p->str);
    118         p->ptr = p->buf + pos;
    119         p->end = p->buf + size;
    120         return 1;
    121     }
    122 }
    123 
    124 static void
    125 w_string(const char *s, Py_ssize_t n, WFILE *p)
    126 {
    127     Py_ssize_t m;
    128     if (!n || p->ptr == NULL)
    129         return;
    130     m = p->end - p->ptr;
    131     if (p->fp != NULL) {
    132         if (n <= m) {
    133             memcpy(p->ptr, s, n);
    134             p->ptr += n;
    135         }
    136         else {
    137             w_flush(p);
    138             fwrite(s, 1, n, p->fp);
    139         }
    140     }
    141     else {
    142         if (n <= m || w_reserve(p, n - m)) {
    143             memcpy(p->ptr, s, n);
    144             p->ptr += n;
    145         }
    146     }
    147 }
    148 
    149 static void
    150 w_short(int x, WFILE *p)
    151 {
    152     w_byte((char)( x      & 0xff), p);
    153     w_byte((char)((x>> 8) & 0xff), p);
    154 }
    155 
    156 static void
    157 w_long(long x, WFILE *p)
    158 {
    159     w_byte((char)( x      & 0xff), p);
    160     w_byte((char)((x>> 8) & 0xff), p);
    161     w_byte((char)((x>>16) & 0xff), p);
    162     w_byte((char)((x>>24) & 0xff), p);
    163 }
    164 
    165 #define SIZE32_MAX  0x7FFFFFFF
    166 
    167 #if SIZEOF_SIZE_T > 4
    168 # define W_SIZE(n, p)  do {                     \
    169         if ((n) > SIZE32_MAX) {                 \
    170             (p)->depth--;                       \
    171             (p)->error = WFERR_UNMARSHALLABLE;  \
    172             return;                             \
    173         }                                       \
    174         w_long((long)(n), p);                   \
    175     } while(0)
    176 #else
    177 # define W_SIZE  w_long
    178 #endif
    179 
    180 static void
    181 w_pstring(const char *s, Py_ssize_t n, WFILE *p)
    182 {
    183         W_SIZE(n, p);
    184         w_string(s, n, p);
    185 }
    186 
    187 static void
    188 w_short_pstring(const char *s, Py_ssize_t n, WFILE *p)
    189 {
    190     w_byte(Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char), p);
    191     w_string(s, n, p);
    192 }
    193 
    194 /* We assume that Python ints are stored internally in base some power of
    195    2**15; for the sake of portability we'll always read and write them in base
    196    exactly 2**15. */
    197 
    198 #define PyLong_MARSHAL_SHIFT 15
    199 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
    200 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
    201 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
    202 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
    203 #endif
    204 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
    205 
    206 #define W_TYPE(t, p) do { \
    207     w_byte((t) | flag, (p)); \
    208 } while(0)
    209 
    210 static void
    211 w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
    212 {
    213     Py_ssize_t i, j, n, l;
    214     digit d;
    215 
    216     W_TYPE(TYPE_LONG, p);
    217     if (Py_SIZE(ob) == 0) {
    218         w_long((long)0, p);
    219         return;
    220     }
    221 
    222     /* set l to number of base PyLong_MARSHAL_BASE digits */
    223     n = Py_ABS(Py_SIZE(ob));
    224     l = (n-1) * PyLong_MARSHAL_RATIO;
    225     d = ob->ob_digit[n-1];
    226     assert(d != 0); /* a PyLong is always normalized */
    227     do {
    228         d >>= PyLong_MARSHAL_SHIFT;
    229         l++;
    230     } while (d != 0);
    231     if (l > SIZE32_MAX) {
    232         p->depth--;
    233         p->error = WFERR_UNMARSHALLABLE;
    234         return;
    235     }
    236     w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
    237 
    238     for (i=0; i < n-1; i++) {
    239         d = ob->ob_digit[i];
    240         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
    241             w_short(d & PyLong_MARSHAL_MASK, p);
    242             d >>= PyLong_MARSHAL_SHIFT;
    243         }
    244         assert (d == 0);
    245     }
    246     d = ob->ob_digit[n-1];
    247     do {
    248         w_short(d & PyLong_MARSHAL_MASK, p);
    249         d >>= PyLong_MARSHAL_SHIFT;
    250     } while (d != 0);
    251 }
    252 
    253 static int
    254 w_ref(PyObject *v, char *flag, WFILE *p)
    255 {
    256     _Py_hashtable_entry_t *entry;
    257     int w;
    258 
    259     if (p->version < 3 || p->hashtable == NULL)
    260         return 0; /* not writing object references */
    261 
    262     /* if it has only one reference, it definitely isn't shared */
    263     if (Py_REFCNT(v) == 1)
    264         return 0;
    265 
    266     entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
    267     if (entry != NULL) {
    268         /* write the reference index to the stream */
    269         _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
    270         /* we don't store "long" indices in the dict */
    271         assert(0 <= w && w <= 0x7fffffff);
    272         w_byte(TYPE_REF, p);
    273         w_long(w, p);
    274         return 1;
    275     } else {
    276         size_t s = p->hashtable->entries;
    277         /* we don't support long indices */
    278         if (s >= 0x7fffffff) {
    279             PyErr_SetString(PyExc_ValueError, "too many objects");
    280             goto err;
    281         }
    282         w = (int)s;
    283         Py_INCREF(v);
    284         if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) {
    285             Py_DECREF(v);
    286             goto err;
    287         }
    288         *flag |= FLAG_REF;
    289         return 0;
    290     }
    291 err:
    292     p->error = WFERR_UNMARSHALLABLE;
    293     return 1;
    294 }
    295 
    296 static void
    297 w_complex_object(PyObject *v, char flag, WFILE *p);
    298 
    299 static void
    300 w_object(PyObject *v, WFILE *p)
    301 {
    302     char flag = '\0';
    303 
    304     p->depth++;
    305 
    306     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
    307         p->error = WFERR_NESTEDTOODEEP;
    308     }
    309     else if (v == NULL) {
    310         w_byte(TYPE_NULL, p);
    311     }
    312     else if (v == Py_None) {
    313         w_byte(TYPE_NONE, p);
    314     }
    315     else if (v == PyExc_StopIteration) {
    316         w_byte(TYPE_STOPITER, p);
    317     }
    318     else if (v == Py_Ellipsis) {
    319         w_byte(TYPE_ELLIPSIS, p);
    320     }
    321     else if (v == Py_False) {
    322         w_byte(TYPE_FALSE, p);
    323     }
    324     else if (v == Py_True) {
    325         w_byte(TYPE_TRUE, p);
    326     }
    327     else if (!w_ref(v, &flag, p))
    328         w_complex_object(v, flag, p);
    329 
    330     p->depth--;
    331 }
    332 
    333 static void
    334 w_complex_object(PyObject *v, char flag, WFILE *p)
    335 {
    336     Py_ssize_t i, n;
    337 
    338     if (PyLong_CheckExact(v)) {
    339         long x = PyLong_AsLong(v);
    340         if ((x == -1)  && PyErr_Occurred()) {
    341             PyLongObject *ob = (PyLongObject *)v;
    342             PyErr_Clear();
    343             w_PyLong(ob, flag, p);
    344         }
    345         else {
    346 #if SIZEOF_LONG > 4
    347             long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
    348             if (y && y != -1) {
    349                 /* Too large for TYPE_INT */
    350                 w_PyLong((PyLongObject*)v, flag, p);
    351             }
    352             else
    353 #endif
    354             {
    355                 W_TYPE(TYPE_INT, p);
    356                 w_long(x, p);
    357             }
    358         }
    359     }
    360     else if (PyFloat_CheckExact(v)) {
    361         if (p->version > 1) {
    362             unsigned char buf[8];
    363             if (_PyFloat_Pack8(PyFloat_AsDouble(v),
    364                                buf, 1) < 0) {
    365                 p->error = WFERR_UNMARSHALLABLE;
    366                 return;
    367             }
    368             W_TYPE(TYPE_BINARY_FLOAT, p);
    369             w_string((char*)buf, 8, p);
    370         }
    371         else {
    372             char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
    373                                               'g', 17, 0, NULL);
    374             if (!buf) {
    375                 p->error = WFERR_NOMEMORY;
    376                 return;
    377             }
    378             n = strlen(buf);
    379             W_TYPE(TYPE_FLOAT, p);
    380             w_byte((int)n, p);
    381             w_string(buf, n, p);
    382             PyMem_Free(buf);
    383         }
    384     }
    385     else if (PyComplex_CheckExact(v)) {
    386         if (p->version > 1) {
    387             unsigned char buf[8];
    388             if (_PyFloat_Pack8(PyComplex_RealAsDouble(v),
    389                                buf, 1) < 0) {
    390                 p->error = WFERR_UNMARSHALLABLE;
    391                 return;
    392             }
    393             W_TYPE(TYPE_BINARY_COMPLEX, p);
    394             w_string((char*)buf, 8, p);
    395             if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
    396                                buf, 1) < 0) {
    397                 p->error = WFERR_UNMARSHALLABLE;
    398                 return;
    399             }
    400             w_string((char*)buf, 8, p);
    401         }
    402         else {
    403             char *buf;
    404             W_TYPE(TYPE_COMPLEX, p);
    405             buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
    406                                         'g', 17, 0, NULL);
    407             if (!buf) {
    408                 p->error = WFERR_NOMEMORY;
    409                 return;
    410             }
    411             n = strlen(buf);
    412             w_byte((int)n, p);
    413             w_string(buf, n, p);
    414             PyMem_Free(buf);
    415             buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
    416                                         'g', 17, 0, NULL);
    417             if (!buf) {
    418                 p->error = WFERR_NOMEMORY;
    419                 return;
    420             }
    421             n = strlen(buf);
    422             w_byte((int)n, p);
    423             w_string(buf, n, p);
    424             PyMem_Free(buf);
    425         }
    426     }
    427     else if (PyBytes_CheckExact(v)) {
    428         W_TYPE(TYPE_STRING, p);
    429         w_pstring(PyBytes_AS_STRING(v), PyBytes_GET_SIZE(v), p);
    430     }
    431     else if (PyUnicode_CheckExact(v)) {
    432         if (p->version >= 4 && PyUnicode_IS_ASCII(v)) {
    433             int is_short = PyUnicode_GET_LENGTH(v) < 256;
    434             if (is_short) {
    435                 if (PyUnicode_CHECK_INTERNED(v))
    436                     W_TYPE(TYPE_SHORT_ASCII_INTERNED, p);
    437                 else
    438                     W_TYPE(TYPE_SHORT_ASCII, p);
    439                 w_short_pstring((char *) PyUnicode_1BYTE_DATA(v),
    440                                 PyUnicode_GET_LENGTH(v), p);
    441             }
    442             else {
    443                 if (PyUnicode_CHECK_INTERNED(v))
    444                     W_TYPE(TYPE_ASCII_INTERNED, p);
    445                 else
    446                     W_TYPE(TYPE_ASCII, p);
    447                 w_pstring((char *) PyUnicode_1BYTE_DATA(v),
    448                           PyUnicode_GET_LENGTH(v), p);
    449             }
    450         }
    451         else {
    452             PyObject *utf8;
    453             utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
    454             if (utf8 == NULL) {
    455                 p->depth--;
    456                 p->error = WFERR_UNMARSHALLABLE;
    457                 return;
    458             }
    459             if (p->version >= 3 &&  PyUnicode_CHECK_INTERNED(v))
    460                 W_TYPE(TYPE_INTERNED, p);
    461             else
    462                 W_TYPE(TYPE_UNICODE, p);
    463             w_pstring(PyBytes_AS_STRING(utf8), PyBytes_GET_SIZE(utf8), p);
    464             Py_DECREF(utf8);
    465         }
    466     }
    467     else if (PyTuple_CheckExact(v)) {
    468         n = PyTuple_Size(v);
    469         if (p->version >= 4 && n < 256) {
    470             W_TYPE(TYPE_SMALL_TUPLE, p);
    471             w_byte((unsigned char)n, p);
    472         }
    473         else {
    474             W_TYPE(TYPE_TUPLE, p);
    475             W_SIZE(n, p);
    476         }
    477         for (i = 0; i < n; i++) {
    478             w_object(PyTuple_GET_ITEM(v, i), p);
    479         }
    480     }
    481     else if (PyList_CheckExact(v)) {
    482         W_TYPE(TYPE_LIST, p);
    483         n = PyList_GET_SIZE(v);
    484         W_SIZE(n, p);
    485         for (i = 0; i < n; i++) {
    486             w_object(PyList_GET_ITEM(v, i), p);
    487         }
    488     }
    489     else if (PyDict_CheckExact(v)) {
    490         Py_ssize_t pos;
    491         PyObject *key, *value;
    492         W_TYPE(TYPE_DICT, p);
    493         /* This one is NULL object terminated! */
    494         pos = 0;
    495         while (PyDict_Next(v, &pos, &key, &value)) {
    496             w_object(key, p);
    497             w_object(value, p);
    498         }
    499         w_object((PyObject *)NULL, p);
    500     }
    501     else if (PyAnySet_CheckExact(v)) {
    502         PyObject *value, *it;
    503 
    504         if (PyObject_TypeCheck(v, &PySet_Type))
    505             W_TYPE(TYPE_SET, p);
    506         else
    507             W_TYPE(TYPE_FROZENSET, p);
    508         n = PyObject_Size(v);
    509         if (n == -1) {
    510             p->depth--;
    511             p->error = WFERR_UNMARSHALLABLE;
    512             return;
    513         }
    514         W_SIZE(n, p);
    515         it = PyObject_GetIter(v);
    516         if (it == NULL) {
    517             p->depth--;
    518             p->error = WFERR_UNMARSHALLABLE;
    519             return;
    520         }
    521         while ((value = PyIter_Next(it)) != NULL) {
    522             w_object(value, p);
    523             Py_DECREF(value);
    524         }
    525         Py_DECREF(it);
    526         if (PyErr_Occurred()) {
    527             p->depth--;
    528             p->error = WFERR_UNMARSHALLABLE;
    529             return;
    530         }
    531     }
    532     else if (PyCode_Check(v)) {
    533         PyCodeObject *co = (PyCodeObject *)v;
    534         W_TYPE(TYPE_CODE, p);
    535         w_long(co->co_argcount, p);
    536         w_long(co->co_kwonlyargcount, p);
    537         w_long(co->co_nlocals, p);
    538         w_long(co->co_stacksize, p);
    539         w_long(co->co_flags, p);
    540         w_object(co->co_code, p);
    541         w_object(co->co_consts, p);
    542         w_object(co->co_names, p);
    543         w_object(co->co_varnames, p);
    544         w_object(co->co_freevars, p);
    545         w_object(co->co_cellvars, p);
    546         w_object(co->co_filename, p);
    547         w_object(co->co_name, p);
    548         w_long(co->co_firstlineno, p);
    549         w_object(co->co_lnotab, p);
    550     }
    551     else if (PyObject_CheckBuffer(v)) {
    552         /* Write unknown bytes-like objects as a byte string */
    553         Py_buffer view;
    554         if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) != 0) {
    555             w_byte(TYPE_UNKNOWN, p);
    556             p->depth--;
    557             p->error = WFERR_UNMARSHALLABLE;
    558             return;
    559         }
    560         W_TYPE(TYPE_STRING, p);
    561         w_pstring(view.buf, view.len, p);
    562         PyBuffer_Release(&view);
    563     }
    564     else {
    565         W_TYPE(TYPE_UNKNOWN, p);
    566         p->error = WFERR_UNMARSHALLABLE;
    567     }
    568 }
    569 
    570 static int
    571 w_init_refs(WFILE *wf, int version)
    572 {
    573     if (version >= 3) {
    574         wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
    575                                           _Py_hashtable_hash_ptr,
    576                                           _Py_hashtable_compare_direct);
    577         if (wf->hashtable == NULL) {
    578             PyErr_NoMemory();
    579             return -1;
    580         }
    581     }
    582     return 0;
    583 }
    584 
    585 static int
    586 w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
    587                void *Py_UNUSED(data))
    588 {
    589     PyObject *entry_key;
    590 
    591     _Py_HASHTABLE_ENTRY_READ_KEY(ht, entry, entry_key);
    592     Py_XDECREF(entry_key);
    593     return 0;
    594 }
    595 
    596 static void
    597 w_clear_refs(WFILE *wf)
    598 {
    599     if (wf->hashtable != NULL) {
    600         _Py_hashtable_foreach(wf->hashtable, w_decref_entry, NULL);
    601         _Py_hashtable_destroy(wf->hashtable);
    602     }
    603 }
    604 
    605 /* version currently has no effect for writing ints. */
    606 void
    607 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
    608 {
    609     char buf[4];
    610     WFILE wf;
    611     memset(&wf, 0, sizeof(wf));
    612     wf.fp = fp;
    613     wf.ptr = wf.buf = buf;
    614     wf.end = wf.ptr + sizeof(buf);
    615     wf.error = WFERR_OK;
    616     wf.version = version;
    617     w_long(x, &wf);
    618     w_flush(&wf);
    619 }
    620 
    621 void
    622 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
    623 {
    624     char buf[BUFSIZ];
    625     WFILE wf;
    626     memset(&wf, 0, sizeof(wf));
    627     wf.fp = fp;
    628     wf.ptr = wf.buf = buf;
    629     wf.end = wf.ptr + sizeof(buf);
    630     wf.error = WFERR_OK;
    631     wf.version = version;
    632     if (w_init_refs(&wf, version))
    633         return; /* caller mush check PyErr_Occurred() */
    634     w_object(x, &wf);
    635     w_clear_refs(&wf);
    636     w_flush(&wf);
    637 }
    638 
    639 typedef struct {
    640     FILE *fp;
    641     int depth;
    642     PyObject *readable;  /* Stream-like object being read from */
    643     PyObject *current_filename;
    644     char *ptr;
    645     char *end;
    646     char *buf;
    647     Py_ssize_t buf_size;
    648     PyObject *refs;  /* a list */
    649 } RFILE;
    650 
    651 static const char *
    652 r_string(Py_ssize_t n, RFILE *p)
    653 {
    654     Py_ssize_t read = -1;
    655 
    656     if (p->ptr != NULL) {
    657         /* Fast path for loads() */
    658         char *res = p->ptr;
    659         Py_ssize_t left = p->end - p->ptr;
    660         if (left < n) {
    661             PyErr_SetString(PyExc_EOFError,
    662                             "marshal data too short");
    663             return NULL;
    664         }
    665         p->ptr += n;
    666         return res;
    667     }
    668     if (p->buf == NULL) {
    669         p->buf = PyMem_MALLOC(n);
    670         if (p->buf == NULL) {
    671             PyErr_NoMemory();
    672             return NULL;
    673         }
    674         p->buf_size = n;
    675     }
    676     else if (p->buf_size < n) {
    677         p->buf = PyMem_REALLOC(p->buf, n);
    678         if (p->buf == NULL) {
    679             PyErr_NoMemory();
    680             return NULL;
    681         }
    682         p->buf_size = n;
    683     }
    684 
    685     if (!p->readable) {
    686         assert(p->fp != NULL);
    687         read = fread(p->buf, 1, n, p->fp);
    688     }
    689     else {
    690         _Py_IDENTIFIER(readinto);
    691         PyObject *res, *mview;
    692         Py_buffer buf;
    693 
    694         if (PyBuffer_FillInfo(&buf, NULL, p->buf, n, 0, PyBUF_CONTIG) == -1)
    695             return NULL;
    696         mview = PyMemoryView_FromBuffer(&buf);
    697         if (mview == NULL)
    698             return NULL;
    699 
    700         res = _PyObject_CallMethodId(p->readable, &PyId_readinto, "N", mview);
    701         if (res != NULL) {
    702             read = PyNumber_AsSsize_t(res, PyExc_ValueError);
    703             Py_DECREF(res);
    704         }
    705     }
    706     if (read != n) {
    707         if (!PyErr_Occurred()) {
    708             if (read > n)
    709                 PyErr_Format(PyExc_ValueError,
    710                              "read() returned too much data: "
    711                              "%zd bytes requested, %zd returned",
    712                              n, read);
    713             else
    714                 PyErr_SetString(PyExc_EOFError,
    715                                 "EOF read where not expected");
    716         }
    717         return NULL;
    718     }
    719     return p->buf;
    720 }
    721 
    722 static int
    723 r_byte(RFILE *p)
    724 {
    725     int c = EOF;
    726 
    727     if (p->ptr != NULL) {
    728         if (p->ptr < p->end)
    729             c = (unsigned char) *p->ptr++;
    730         return c;
    731     }
    732     if (!p->readable) {
    733         assert(p->fp);
    734         c = getc(p->fp);
    735     }
    736     else {
    737         const char *ptr = r_string(1, p);
    738         if (ptr != NULL)
    739             c = *(unsigned char *) ptr;
    740     }
    741     return c;
    742 }
    743 
    744 static int
    745 r_short(RFILE *p)
    746 {
    747     short x = -1;
    748     const unsigned char *buffer;
    749 
    750     buffer = (const unsigned char *) r_string(2, p);
    751     if (buffer != NULL) {
    752         x = buffer[0];
    753         x |= buffer[1] << 8;
    754         /* Sign-extension, in case short greater than 16 bits */
    755         x |= -(x & 0x8000);
    756     }
    757     return x;
    758 }
    759 
    760 static long
    761 r_long(RFILE *p)
    762 {
    763     long x = -1;
    764     const unsigned char *buffer;
    765 
    766     buffer = (const unsigned char *) r_string(4, p);
    767     if (buffer != NULL) {
    768         x = buffer[0];
    769         x |= (long)buffer[1] << 8;
    770         x |= (long)buffer[2] << 16;
    771         x |= (long)buffer[3] << 24;
    772 #if SIZEOF_LONG > 4
    773         /* Sign extension for 64-bit machines */
    774         x |= -(x & 0x80000000L);
    775 #endif
    776     }
    777     return x;
    778 }
    779 
    780 static PyObject *
    781 r_PyLong(RFILE *p)
    782 {
    783     PyLongObject *ob;
    784     long n, size, i;
    785     int j, md, shorts_in_top_digit;
    786     digit d;
    787 
    788     n = r_long(p);
    789     if (PyErr_Occurred())
    790         return NULL;
    791     if (n == 0)
    792         return (PyObject *)_PyLong_New(0);
    793     if (n < -SIZE32_MAX || n > SIZE32_MAX) {
    794         PyErr_SetString(PyExc_ValueError,
    795                        "bad marshal data (long size out of range)");
    796         return NULL;
    797     }
    798 
    799     size = 1 + (Py_ABS(n) - 1) / PyLong_MARSHAL_RATIO;
    800     shorts_in_top_digit = 1 + (Py_ABS(n) - 1) % PyLong_MARSHAL_RATIO;
    801     ob = _PyLong_New(size);
    802     if (ob == NULL)
    803         return NULL;
    804 
    805     Py_SIZE(ob) = n > 0 ? size : -size;
    806 
    807     for (i = 0; i < size-1; i++) {
    808         d = 0;
    809         for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
    810             md = r_short(p);
    811             if (PyErr_Occurred()) {
    812                 Py_DECREF(ob);
    813                 return NULL;
    814             }
    815             if (md < 0 || md > PyLong_MARSHAL_BASE)
    816                 goto bad_digit;
    817             d += (digit)md << j*PyLong_MARSHAL_SHIFT;
    818         }
    819         ob->ob_digit[i] = d;
    820     }
    821 
    822     d = 0;
    823     for (j=0; j < shorts_in_top_digit; j++) {
    824         md = r_short(p);
    825         if (PyErr_Occurred()) {
    826             Py_DECREF(ob);
    827             return NULL;
    828         }
    829         if (md < 0 || md > PyLong_MARSHAL_BASE)
    830             goto bad_digit;
    831         /* topmost marshal digit should be nonzero */
    832         if (md == 0 && j == shorts_in_top_digit - 1) {
    833             Py_DECREF(ob);
    834             PyErr_SetString(PyExc_ValueError,
    835                 "bad marshal data (unnormalized long data)");
    836             return NULL;
    837         }
    838         d += (digit)md << j*PyLong_MARSHAL_SHIFT;
    839     }
    840     if (PyErr_Occurred()) {
    841         Py_DECREF(ob);
    842         return NULL;
    843     }
    844     /* top digit should be nonzero, else the resulting PyLong won't be
    845        normalized */
    846     ob->ob_digit[size-1] = d;
    847     return (PyObject *)ob;
    848   bad_digit:
    849     Py_DECREF(ob);
    850     PyErr_SetString(PyExc_ValueError,
    851                     "bad marshal data (digit out of range in long)");
    852     return NULL;
    853 }
    854 
    855 /* allocate the reflist index for a new object. Return -1 on failure */
    856 static Py_ssize_t
    857 r_ref_reserve(int flag, RFILE *p)
    858 {
    859     if (flag) { /* currently only FLAG_REF is defined */
    860         Py_ssize_t idx = PyList_GET_SIZE(p->refs);
    861         if (idx >= 0x7ffffffe) {
    862             PyErr_SetString(PyExc_ValueError, "bad marshal data (index list too large)");
    863             return -1;
    864         }
    865         if (PyList_Append(p->refs, Py_None) < 0)
    866             return -1;
    867         return idx;
    868     } else
    869         return 0;
    870 }
    871 
    872 /* insert the new object 'o' to the reflist at previously
    873  * allocated index 'idx'.
    874  * 'o' can be NULL, in which case nothing is done.
    875  * if 'o' was non-NULL, and the function succeeds, 'o' is returned.
    876  * if 'o' was non-NULL, and the function fails, 'o' is released and
    877  * NULL returned. This simplifies error checking at the call site since
    878  * a single test for NULL for the function result is enough.
    879  */
    880 static PyObject *
    881 r_ref_insert(PyObject *o, Py_ssize_t idx, int flag, RFILE *p)
    882 {
    883     if (o != NULL && flag) { /* currently only FLAG_REF is defined */
    884         PyObject *tmp = PyList_GET_ITEM(p->refs, idx);
    885         Py_INCREF(o);
    886         PyList_SET_ITEM(p->refs, idx, o);
    887         Py_DECREF(tmp);
    888     }
    889     return o;
    890 }
    891 
    892 /* combination of both above, used when an object can be
    893  * created whenever it is seen in the file, as opposed to
    894  * after having loaded its sub-objects.
    895  */
    896 static PyObject *
    897 r_ref(PyObject *o, int flag, RFILE *p)
    898 {
    899     assert(flag & FLAG_REF);
    900     if (o == NULL)
    901         return NULL;
    902     if (PyList_Append(p->refs, o) < 0) {
    903         Py_DECREF(o); /* release the new object */
    904         return NULL;
    905     }
    906     return o;
    907 }
    908 
    909 static PyObject *
    910 r_object(RFILE *p)
    911 {
    912     /* NULL is a valid return value, it does not necessarily means that
    913        an exception is set. */
    914     PyObject *v, *v2;
    915     Py_ssize_t idx = 0;
    916     long i, n;
    917     int type, code = r_byte(p);
    918     int flag, is_interned = 0;
    919     PyObject *retval = NULL;
    920 
    921     if (code == EOF) {
    922         PyErr_SetString(PyExc_EOFError,
    923                         "EOF read where object expected");
    924         return NULL;
    925     }
    926 
    927     p->depth++;
    928 
    929     if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
    930         p->depth--;
    931         PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
    932         return NULL;
    933     }
    934 
    935     flag = code & FLAG_REF;
    936     type = code & ~FLAG_REF;
    937 
    938 #define R_REF(O) do{\
    939     if (flag) \
    940         O = r_ref(O, flag, p);\
    941 } while (0)
    942 
    943     switch (type) {
    944 
    945     case TYPE_NULL:
    946         break;
    947 
    948     case TYPE_NONE:
    949         Py_INCREF(Py_None);
    950         retval = Py_None;
    951         break;
    952 
    953     case TYPE_STOPITER:
    954         Py_INCREF(PyExc_StopIteration);
    955         retval = PyExc_StopIteration;
    956         break;
    957 
    958     case TYPE_ELLIPSIS:
    959         Py_INCREF(Py_Ellipsis);
    960         retval = Py_Ellipsis;
    961         break;
    962 
    963     case TYPE_FALSE:
    964         Py_INCREF(Py_False);
    965         retval = Py_False;
    966         break;
    967 
    968     case TYPE_TRUE:
    969         Py_INCREF(Py_True);
    970         retval = Py_True;
    971         break;
    972 
    973     case TYPE_INT:
    974         n = r_long(p);
    975         retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
    976         R_REF(retval);
    977         break;
    978 
    979     case TYPE_LONG:
    980         retval = r_PyLong(p);
    981         R_REF(retval);
    982         break;
    983 
    984     case TYPE_FLOAT:
    985         {
    986             char buf[256];
    987             const char *ptr;
    988             double dx;
    989             n = r_byte(p);
    990             if (n == EOF) {
    991                 PyErr_SetString(PyExc_EOFError,
    992                     "EOF read where object expected");
    993                 break;
    994             }
    995             ptr = r_string(n, p);
    996             if (ptr == NULL)
    997                 break;
    998             memcpy(buf, ptr, n);
    999             buf[n] = '\0';
   1000             dx = PyOS_string_to_double(buf, NULL, NULL);
   1001             if (dx == -1.0 && PyErr_Occurred())
   1002                 break;
   1003             retval = PyFloat_FromDouble(dx);
   1004             R_REF(retval);
   1005             break;
   1006         }
   1007 
   1008     case TYPE_BINARY_FLOAT:
   1009         {
   1010             const unsigned char *buf;
   1011             double x;
   1012             buf = (const unsigned char *) r_string(8, p);
   1013             if (buf == NULL)
   1014                 break;
   1015             x = _PyFloat_Unpack8(buf, 1);
   1016             if (x == -1.0 && PyErr_Occurred())
   1017                 break;
   1018             retval = PyFloat_FromDouble(x);
   1019             R_REF(retval);
   1020             break;
   1021         }
   1022 
   1023     case TYPE_COMPLEX:
   1024         {
   1025             char buf[256];
   1026             const char *ptr;
   1027             Py_complex c;
   1028             n = r_byte(p);
   1029             if (n == EOF) {
   1030                 PyErr_SetString(PyExc_EOFError,
   1031                     "EOF read where object expected");
   1032                 break;
   1033             }
   1034             ptr = r_string(n, p);
   1035             if (ptr == NULL)
   1036                 break;
   1037             memcpy(buf, ptr, n);
   1038             buf[n] = '\0';
   1039             c.real = PyOS_string_to_double(buf, NULL, NULL);
   1040             if (c.real == -1.0 && PyErr_Occurred())
   1041                 break;
   1042             n = r_byte(p);
   1043             if (n == EOF) {
   1044                 PyErr_SetString(PyExc_EOFError,
   1045                     "EOF read where object expected");
   1046                 break;
   1047             }
   1048             ptr = r_string(n, p);
   1049             if (ptr == NULL)
   1050                 break;
   1051             memcpy(buf, ptr, n);
   1052             buf[n] = '\0';
   1053             c.imag = PyOS_string_to_double(buf, NULL, NULL);
   1054             if (c.imag == -1.0 && PyErr_Occurred())
   1055                 break;
   1056             retval = PyComplex_FromCComplex(c);
   1057             R_REF(retval);
   1058             break;
   1059         }
   1060 
   1061     case TYPE_BINARY_COMPLEX:
   1062         {
   1063             const unsigned char *buf;
   1064             Py_complex c;
   1065             buf = (const unsigned char *) r_string(8, p);
   1066             if (buf == NULL)
   1067                 break;
   1068             c.real = _PyFloat_Unpack8(buf, 1);
   1069             if (c.real == -1.0 && PyErr_Occurred())
   1070                 break;
   1071             buf = (const unsigned char *) r_string(8, p);
   1072             if (buf == NULL)
   1073                 break;
   1074             c.imag = _PyFloat_Unpack8(buf, 1);
   1075             if (c.imag == -1.0 && PyErr_Occurred())
   1076                 break;
   1077             retval = PyComplex_FromCComplex(c);
   1078             R_REF(retval);
   1079             break;
   1080         }
   1081 
   1082     case TYPE_STRING:
   1083         {
   1084             const char *ptr;
   1085             n = r_long(p);
   1086             if (PyErr_Occurred())
   1087                 break;
   1088             if (n < 0 || n > SIZE32_MAX) {
   1089                 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
   1090                 break;
   1091             }
   1092             v = PyBytes_FromStringAndSize((char *)NULL, n);
   1093             if (v == NULL)
   1094                 break;
   1095             ptr = r_string(n, p);
   1096             if (ptr == NULL) {
   1097                 Py_DECREF(v);
   1098                 break;
   1099             }
   1100             memcpy(PyBytes_AS_STRING(v), ptr, n);
   1101             retval = v;
   1102             R_REF(retval);
   1103             break;
   1104         }
   1105 
   1106     case TYPE_ASCII_INTERNED:
   1107         is_interned = 1;
   1108     case TYPE_ASCII:
   1109         n = r_long(p);
   1110         if (PyErr_Occurred())
   1111             break;
   1112         if (n < 0 || n > SIZE32_MAX) {
   1113             PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
   1114             break;
   1115         }
   1116         goto _read_ascii;
   1117 
   1118     case TYPE_SHORT_ASCII_INTERNED:
   1119         is_interned = 1;
   1120     case TYPE_SHORT_ASCII:
   1121         n = r_byte(p);
   1122         if (n == EOF) {
   1123             PyErr_SetString(PyExc_EOFError,
   1124                 "EOF read where object expected");
   1125             break;
   1126         }
   1127     _read_ascii:
   1128         {
   1129             const char *ptr;
   1130             ptr = r_string(n, p);
   1131             if (ptr == NULL)
   1132                 break;
   1133             v = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, ptr, n);
   1134             if (v == NULL)
   1135                 break;
   1136             if (is_interned)
   1137                 PyUnicode_InternInPlace(&v);
   1138             retval = v;
   1139             R_REF(retval);
   1140             break;
   1141         }
   1142 
   1143     case TYPE_INTERNED:
   1144         is_interned = 1;
   1145     case TYPE_UNICODE:
   1146         {
   1147         const char *buffer;
   1148 
   1149         n = r_long(p);
   1150         if (PyErr_Occurred())
   1151             break;
   1152         if (n < 0 || n > SIZE32_MAX) {
   1153             PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
   1154             break;
   1155         }
   1156         if (n != 0) {
   1157             buffer = r_string(n, p);
   1158             if (buffer == NULL)
   1159                 break;
   1160             v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
   1161         }
   1162         else {
   1163             v = PyUnicode_New(0, 0);
   1164         }
   1165         if (v == NULL)
   1166             break;
   1167         if (is_interned)
   1168             PyUnicode_InternInPlace(&v);
   1169         retval = v;
   1170         R_REF(retval);
   1171         break;
   1172         }
   1173 
   1174     case TYPE_SMALL_TUPLE:
   1175         n = (unsigned char) r_byte(p);
   1176         if (PyErr_Occurred())
   1177             break;
   1178         goto _read_tuple;
   1179     case TYPE_TUPLE:
   1180         n = r_long(p);
   1181         if (PyErr_Occurred())
   1182             break;
   1183         if (n < 0 || n > SIZE32_MAX) {
   1184             PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
   1185             break;
   1186         }
   1187     _read_tuple:
   1188         v = PyTuple_New(n);
   1189         R_REF(v);
   1190         if (v == NULL)
   1191             break;
   1192 
   1193         for (i = 0; i < n; i++) {
   1194             v2 = r_object(p);
   1195             if ( v2 == NULL ) {
   1196                 if (!PyErr_Occurred())
   1197                     PyErr_SetString(PyExc_TypeError,
   1198                         "NULL object in marshal data for tuple");
   1199                 Py_DECREF(v);
   1200                 v = NULL;
   1201                 break;
   1202             }
   1203             PyTuple_SET_ITEM(v, i, v2);
   1204         }
   1205         retval = v;
   1206         break;
   1207 
   1208     case TYPE_LIST:
   1209         n = r_long(p);
   1210         if (PyErr_Occurred())
   1211             break;
   1212         if (n < 0 || n > SIZE32_MAX) {
   1213             PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
   1214             break;
   1215         }
   1216         v = PyList_New(n);
   1217         R_REF(v);
   1218         if (v == NULL)
   1219             break;
   1220         for (i = 0; i < n; i++) {
   1221             v2 = r_object(p);
   1222             if ( v2 == NULL ) {
   1223                 if (!PyErr_Occurred())
   1224                     PyErr_SetString(PyExc_TypeError,
   1225                         "NULL object in marshal data for list");
   1226                 Py_DECREF(v);
   1227                 v = NULL;
   1228                 break;
   1229             }
   1230             PyList_SET_ITEM(v, i, v2);
   1231         }
   1232         retval = v;
   1233         break;
   1234 
   1235     case TYPE_DICT:
   1236         v = PyDict_New();
   1237         R_REF(v);
   1238         if (v == NULL)
   1239             break;
   1240         for (;;) {
   1241             PyObject *key, *val;
   1242             key = r_object(p);
   1243             if (key == NULL)
   1244                 break;
   1245             val = r_object(p);
   1246             if (val == NULL) {
   1247                 Py_DECREF(key);
   1248                 break;
   1249             }
   1250             if (PyDict_SetItem(v, key, val) < 0) {
   1251                 Py_DECREF(key);
   1252                 Py_DECREF(val);
   1253                 break;
   1254             }
   1255             Py_DECREF(key);
   1256             Py_DECREF(val);
   1257         }
   1258         if (PyErr_Occurred()) {
   1259             Py_DECREF(v);
   1260             v = NULL;
   1261         }
   1262         retval = v;
   1263         break;
   1264 
   1265     case TYPE_SET:
   1266     case TYPE_FROZENSET:
   1267         n = r_long(p);
   1268         if (PyErr_Occurred())
   1269             break;
   1270         if (n < 0 || n > SIZE32_MAX) {
   1271             PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
   1272             break;
   1273         }
   1274 
   1275         if (n == 0 && type == TYPE_FROZENSET) {
   1276             /* call frozenset() to get the empty frozenset singleton */
   1277             v = PyObject_CallFunction((PyObject*)&PyFrozenSet_Type, NULL);
   1278             if (v == NULL)
   1279                 break;
   1280             R_REF(v);
   1281             retval = v;
   1282         }
   1283         else {
   1284             v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
   1285             if (type == TYPE_SET) {
   1286                 R_REF(v);
   1287             } else {
   1288                 /* must use delayed registration of frozensets because they must
   1289                  * be init with a refcount of 1
   1290                  */
   1291                 idx = r_ref_reserve(flag, p);
   1292                 if (idx < 0)
   1293                     Py_CLEAR(v); /* signal error */
   1294             }
   1295             if (v == NULL)
   1296                 break;
   1297 
   1298             for (i = 0; i < n; i++) {
   1299                 v2 = r_object(p);
   1300                 if ( v2 == NULL ) {
   1301                     if (!PyErr_Occurred())
   1302                         PyErr_SetString(PyExc_TypeError,
   1303                             "NULL object in marshal data for set");
   1304                     Py_DECREF(v);
   1305                     v = NULL;
   1306                     break;
   1307                 }
   1308                 if (PySet_Add(v, v2) == -1) {
   1309                     Py_DECREF(v);
   1310                     Py_DECREF(v2);
   1311                     v = NULL;
   1312                     break;
   1313                 }
   1314                 Py_DECREF(v2);
   1315             }
   1316             if (type != TYPE_SET)
   1317                 v = r_ref_insert(v, idx, flag, p);
   1318             retval = v;
   1319         }
   1320         break;
   1321 
   1322     case TYPE_CODE:
   1323         {
   1324             int argcount;
   1325             int kwonlyargcount;
   1326             int nlocals;
   1327             int stacksize;
   1328             int flags;
   1329             PyObject *code = NULL;
   1330             PyObject *consts = NULL;
   1331             PyObject *names = NULL;
   1332             PyObject *varnames = NULL;
   1333             PyObject *freevars = NULL;
   1334             PyObject *cellvars = NULL;
   1335             PyObject *filename = NULL;
   1336             PyObject *name = NULL;
   1337             int firstlineno;
   1338             PyObject *lnotab = NULL;
   1339 
   1340             idx = r_ref_reserve(flag, p);
   1341             if (idx < 0)
   1342                 break;
   1343 
   1344             v = NULL;
   1345 
   1346             /* XXX ignore long->int overflows for now */
   1347             argcount = (int)r_long(p);
   1348             if (PyErr_Occurred())
   1349                 goto code_error;
   1350             kwonlyargcount = (int)r_long(p);
   1351             if (PyErr_Occurred())
   1352                 goto code_error;
   1353             nlocals = (int)r_long(p);
   1354             if (PyErr_Occurred())
   1355                 goto code_error;
   1356             stacksize = (int)r_long(p);
   1357             if (PyErr_Occurred())
   1358                 goto code_error;
   1359             flags = (int)r_long(p);
   1360             if (PyErr_Occurred())
   1361                 goto code_error;
   1362             code = r_object(p);
   1363             if (code == NULL)
   1364                 goto code_error;
   1365             consts = r_object(p);
   1366             if (consts == NULL)
   1367                 goto code_error;
   1368             names = r_object(p);
   1369             if (names == NULL)
   1370                 goto code_error;
   1371             varnames = r_object(p);
   1372             if (varnames == NULL)
   1373                 goto code_error;
   1374             freevars = r_object(p);
   1375             if (freevars == NULL)
   1376                 goto code_error;
   1377             cellvars = r_object(p);
   1378             if (cellvars == NULL)
   1379                 goto code_error;
   1380             filename = r_object(p);
   1381             if (filename == NULL)
   1382                 goto code_error;
   1383             if (PyUnicode_CheckExact(filename)) {
   1384                 if (p->current_filename != NULL) {
   1385                     if (!PyUnicode_Compare(filename, p->current_filename)) {
   1386                         Py_DECREF(filename);
   1387                         Py_INCREF(p->current_filename);
   1388                         filename = p->current_filename;
   1389                     }
   1390                 }
   1391                 else {
   1392                     p->current_filename = filename;
   1393                 }
   1394             }
   1395             name = r_object(p);
   1396             if (name == NULL)
   1397                 goto code_error;
   1398             firstlineno = (int)r_long(p);
   1399             if (firstlineno == -1 && PyErr_Occurred())
   1400                 break;
   1401             lnotab = r_object(p);
   1402             if (lnotab == NULL)
   1403                 goto code_error;
   1404 
   1405             v = (PyObject *) PyCode_New(
   1406                             argcount, kwonlyargcount,
   1407                             nlocals, stacksize, flags,
   1408                             code, consts, names, varnames,
   1409                             freevars, cellvars, filename, name,
   1410                             firstlineno, lnotab);
   1411             v = r_ref_insert(v, idx, flag, p);
   1412 
   1413           code_error:
   1414             Py_XDECREF(code);
   1415             Py_XDECREF(consts);
   1416             Py_XDECREF(names);
   1417             Py_XDECREF(varnames);
   1418             Py_XDECREF(freevars);
   1419             Py_XDECREF(cellvars);
   1420             Py_XDECREF(filename);
   1421             Py_XDECREF(name);
   1422             Py_XDECREF(lnotab);
   1423         }
   1424         retval = v;
   1425         break;
   1426 
   1427     case TYPE_REF:
   1428         n = r_long(p);
   1429         if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
   1430             if (n == -1 && PyErr_Occurred())
   1431                 break;
   1432             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
   1433             break;
   1434         }
   1435         v = PyList_GET_ITEM(p->refs, n);
   1436         if (v == Py_None) {
   1437             PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
   1438             break;
   1439         }
   1440         Py_INCREF(v);
   1441         retval = v;
   1442         break;
   1443 
   1444     default:
   1445         /* Bogus data got written, which isn't ideal.
   1446            This will let you keep working and recover. */
   1447         PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
   1448         break;
   1449 
   1450     }
   1451     p->depth--;
   1452     return retval;
   1453 }
   1454 
   1455 static PyObject *
   1456 read_object(RFILE *p)
   1457 {
   1458     PyObject *v;
   1459     if (PyErr_Occurred()) {
   1460         fprintf(stderr, "XXX readobject called with exception set\n");
   1461         return NULL;
   1462     }
   1463     v = r_object(p);
   1464     if (v == NULL && !PyErr_Occurred())
   1465         PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
   1466     return v;
   1467 }
   1468 
   1469 int
   1470 PyMarshal_ReadShortFromFile(FILE *fp)
   1471 {
   1472     RFILE rf;
   1473     int res;
   1474     assert(fp);
   1475     rf.readable = NULL;
   1476     rf.fp = fp;
   1477     rf.current_filename = NULL;
   1478     rf.end = rf.ptr = NULL;
   1479     rf.buf = NULL;
   1480     res = r_short(&rf);
   1481     if (rf.buf != NULL)
   1482         PyMem_FREE(rf.buf);
   1483     return res;
   1484 }
   1485 
   1486 long
   1487 PyMarshal_ReadLongFromFile(FILE *fp)
   1488 {
   1489     RFILE rf;
   1490     long res;
   1491     rf.fp = fp;
   1492     rf.readable = NULL;
   1493     rf.current_filename = NULL;
   1494     rf.ptr = rf.end = NULL;
   1495     rf.buf = NULL;
   1496     res = r_long(&rf);
   1497     if (rf.buf != NULL)
   1498         PyMem_FREE(rf.buf);
   1499     return res;
   1500 }
   1501 
   1502 /* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
   1503 static off_t
   1504 getfilesize(FILE *fp)
   1505 {
   1506     struct _Py_stat_struct st;
   1507     if (_Py_fstat_noraise(fileno(fp), &st) != 0)
   1508         return -1;
   1509 #if SIZEOF_OFF_T == 4
   1510     else if (st.st_size >= INT_MAX)
   1511         return (off_t)INT_MAX;
   1512 #endif
   1513     else
   1514         return (off_t)st.st_size;
   1515 }
   1516 
   1517 /* If we can get the size of the file up-front, and it's reasonably small,
   1518  * read it in one gulp and delegate to ...FromString() instead.  Much quicker
   1519  * than reading a byte at a time from file; speeds .pyc imports.
   1520  * CAUTION:  since this may read the entire remainder of the file, don't
   1521  * call it unless you know you're done with the file.
   1522  */
   1523 PyObject *
   1524 PyMarshal_ReadLastObjectFromFile(FILE *fp)
   1525 {
   1526 /* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
   1527 #define REASONABLE_FILE_LIMIT (1L << 18)
   1528     off_t filesize;
   1529     filesize = getfilesize(fp);
   1530     if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
   1531         char* pBuf = (char *)PyMem_MALLOC(filesize);
   1532         if (pBuf != NULL) {
   1533             size_t n = fread(pBuf, 1, (size_t)filesize, fp);
   1534             PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
   1535             PyMem_FREE(pBuf);
   1536             return v;
   1537         }
   1538 
   1539     }
   1540     /* We don't have fstat, or we do but the file is larger than
   1541      * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
   1542      */
   1543     return PyMarshal_ReadObjectFromFile(fp);
   1544 
   1545 #undef REASONABLE_FILE_LIMIT
   1546 }
   1547 
   1548 PyObject *
   1549 PyMarshal_ReadObjectFromFile(FILE *fp)
   1550 {
   1551     RFILE rf;
   1552     PyObject *result;
   1553     rf.fp = fp;
   1554     rf.readable = NULL;
   1555     rf.current_filename = NULL;
   1556     rf.depth = 0;
   1557     rf.ptr = rf.end = NULL;
   1558     rf.buf = NULL;
   1559     rf.refs = PyList_New(0);
   1560     if (rf.refs == NULL)
   1561         return NULL;
   1562     result = r_object(&rf);
   1563     Py_DECREF(rf.refs);
   1564     if (rf.buf != NULL)
   1565         PyMem_FREE(rf.buf);
   1566     return result;
   1567 }
   1568 
   1569 PyObject *
   1570 PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
   1571 {
   1572     RFILE rf;
   1573     PyObject *result;
   1574     rf.fp = NULL;
   1575     rf.readable = NULL;
   1576     rf.current_filename = NULL;
   1577     rf.ptr = (char *)str;
   1578     rf.end = (char *)str + len;
   1579     rf.buf = NULL;
   1580     rf.depth = 0;
   1581     rf.refs = PyList_New(0);
   1582     if (rf.refs == NULL)
   1583         return NULL;
   1584     result = r_object(&rf);
   1585     Py_DECREF(rf.refs);
   1586     if (rf.buf != NULL)
   1587         PyMem_FREE(rf.buf);
   1588     return result;
   1589 }
   1590 
   1591 PyObject *
   1592 PyMarshal_WriteObjectToString(PyObject *x, int version)
   1593 {
   1594     WFILE wf;
   1595 
   1596     memset(&wf, 0, sizeof(wf));
   1597     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
   1598     if (wf.str == NULL)
   1599         return NULL;
   1600     wf.ptr = wf.buf = PyBytes_AS_STRING((PyBytesObject *)wf.str);
   1601     wf.end = wf.ptr + PyBytes_Size(wf.str);
   1602     wf.error = WFERR_OK;
   1603     wf.version = version;
   1604     if (w_init_refs(&wf, version)) {
   1605         Py_DECREF(wf.str);
   1606         return NULL;
   1607     }
   1608     w_object(x, &wf);
   1609     w_clear_refs(&wf);
   1610     if (wf.str != NULL) {
   1611         char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);
   1612         if (wf.ptr - base > PY_SSIZE_T_MAX) {
   1613             Py_DECREF(wf.str);
   1614             PyErr_SetString(PyExc_OverflowError,
   1615                             "too much marshal data for a string");
   1616             return NULL;
   1617         }
   1618         if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
   1619             return NULL;
   1620     }
   1621     if (wf.error != WFERR_OK) {
   1622         Py_XDECREF(wf.str);
   1623         if (wf.error == WFERR_NOMEMORY)
   1624             PyErr_NoMemory();
   1625         else
   1626             PyErr_SetString(PyExc_ValueError,
   1627               (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
   1628                :"object too deeply nested to marshal");
   1629         return NULL;
   1630     }
   1631     return wf.str;
   1632 }
   1633 
   1634 /* And an interface for Python programs... */
   1635 
   1636 static PyObject *
   1637 marshal_dump(PyObject *self, PyObject *args)
   1638 {
   1639     /* XXX Quick hack -- need to do this differently */
   1640     PyObject *x;
   1641     PyObject *f;
   1642     int version = Py_MARSHAL_VERSION;
   1643     PyObject *s;
   1644     PyObject *res;
   1645     _Py_IDENTIFIER(write);
   1646 
   1647     if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version))
   1648         return NULL;
   1649     s = PyMarshal_WriteObjectToString(x, version);
   1650     if (s == NULL)
   1651         return NULL;
   1652     res = _PyObject_CallMethodId(f, &PyId_write, "O", s);
   1653     Py_DECREF(s);
   1654     return res;
   1655 }
   1656 
   1657 PyDoc_STRVAR(dump_doc,
   1658 "dump(value, file[, version])\n\
   1659 \n\
   1660 Write the value on the open file. The value must be a supported type.\n\
   1661 The file must be an open file object such as sys.stdout or returned by\n\
   1662 open() or os.popen(). It must be opened in binary mode ('wb' or 'w+b').\n\
   1663 \n\
   1664 If the value has (or contains an object that has) an unsupported type, a\n\
   1665 ValueError exception is raised - but garbage data will also be written\n\
   1666 to the file. The object will not be properly read back by load()\n\
   1667 \n\
   1668 The version argument indicates the data format that dump should use.");
   1669 
   1670 static PyObject *
   1671 marshal_load(PyObject *self, PyObject *f)
   1672 {
   1673     PyObject *data, *result;
   1674     _Py_IDENTIFIER(read);
   1675     RFILE rf;
   1676 
   1677     /*
   1678      * Make a call to the read method, but read zero bytes.
   1679      * This is to ensure that the object passed in at least
   1680      * has a read method which returns bytes.
   1681      * This can be removed if we guarantee good error handling
   1682      * for r_string()
   1683      */
   1684     data = _PyObject_CallMethodId(f, &PyId_read, "i", 0);
   1685     if (data == NULL)
   1686         return NULL;
   1687     if (!PyBytes_Check(data)) {
   1688         PyErr_Format(PyExc_TypeError,
   1689                      "f.read() returned not bytes but %.100s",
   1690                      data->ob_type->tp_name);
   1691         result = NULL;
   1692     }
   1693     else {
   1694         rf.depth = 0;
   1695         rf.fp = NULL;
   1696         rf.readable = f;
   1697         rf.current_filename = NULL;
   1698         rf.ptr = rf.end = NULL;
   1699         rf.buf = NULL;
   1700         if ((rf.refs = PyList_New(0)) != NULL) {
   1701             result = read_object(&rf);
   1702             Py_DECREF(rf.refs);
   1703             if (rf.buf != NULL)
   1704                 PyMem_FREE(rf.buf);
   1705         } else
   1706             result = NULL;
   1707     }
   1708     Py_DECREF(data);
   1709     return result;
   1710 }
   1711 
   1712 PyDoc_STRVAR(load_doc,
   1713 "load(file)\n\
   1714 \n\
   1715 Read one value from the open file and return it. If no valid value is\n\
   1716 read (e.g. because the data has a different Python version's\n\
   1717 incompatible marshal format), raise EOFError, ValueError or TypeError.\n\
   1718 The file must be an open file object opened in binary mode ('rb' or\n\
   1719 'r+b').\n\
   1720 \n\
   1721 Note: If an object containing an unsupported type was marshalled with\n\
   1722 dump(), load() will substitute None for the unmarshallable type.");
   1723 
   1724 
   1725 static PyObject *
   1726 marshal_dumps(PyObject *self, PyObject *args)
   1727 {
   1728     PyObject *x;
   1729     int version = Py_MARSHAL_VERSION;
   1730     if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version))
   1731         return NULL;
   1732     return PyMarshal_WriteObjectToString(x, version);
   1733 }
   1734 
   1735 PyDoc_STRVAR(dumps_doc,
   1736 "dumps(value[, version])\n\
   1737 \n\
   1738 Return the string that would be written to a file by dump(value, file).\n\
   1739 The value must be a supported type. Raise a ValueError exception if\n\
   1740 value has (or contains an object that has) an unsupported type.\n\
   1741 \n\
   1742 The version argument indicates the data format that dumps should use.");
   1743 
   1744 
   1745 static PyObject *
   1746 marshal_loads(PyObject *self, PyObject *args)
   1747 {
   1748     RFILE rf;
   1749     Py_buffer p;
   1750     char *s;
   1751     Py_ssize_t n;
   1752     PyObject* result;
   1753     if (!PyArg_ParseTuple(args, "y*:loads", &p))
   1754         return NULL;
   1755     s = p.buf;
   1756     n = p.len;
   1757     rf.fp = NULL;
   1758     rf.readable = NULL;
   1759     rf.current_filename = NULL;
   1760     rf.ptr = s;
   1761     rf.end = s + n;
   1762     rf.depth = 0;
   1763     if ((rf.refs = PyList_New(0)) == NULL)
   1764         return NULL;
   1765     result = read_object(&rf);
   1766     PyBuffer_Release(&p);
   1767     Py_DECREF(rf.refs);
   1768     return result;
   1769 }
   1770 
   1771 PyDoc_STRVAR(loads_doc,
   1772 "loads(bytes)\n\
   1773 \n\
   1774 Convert the bytes object to a value. If no valid value is found, raise\n\
   1775 EOFError, ValueError or TypeError. Extra characters in the input are\n\
   1776 ignored.");
   1777 
   1778 static PyMethodDef marshal_methods[] = {
   1779     {"dump",            marshal_dump,   METH_VARARGS,   dump_doc},
   1780     {"load",            marshal_load,   METH_O,         load_doc},
   1781     {"dumps",           marshal_dumps,  METH_VARARGS,   dumps_doc},
   1782     {"loads",           marshal_loads,  METH_VARARGS,   loads_doc},
   1783     {NULL,              NULL}           /* sentinel */
   1784 };
   1785 
   1786 
   1787 PyDoc_STRVAR(module_doc,
   1788 "This module contains functions that can read and write Python values in\n\
   1789 a binary format. The format is specific to Python, but independent of\n\
   1790 machine architecture issues.\n\
   1791 \n\
   1792 Not all Python object types are supported; in general, only objects\n\
   1793 whose value is independent from a particular invocation of Python can be\n\
   1794 written and read by this module. The following types are supported:\n\
   1795 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
   1796 tuples, lists, sets, dictionaries, and code objects, where it\n\
   1797 should be understood that tuples, lists and dictionaries are only\n\
   1798 supported as long as the values contained therein are themselves\n\
   1799 supported; and recursive lists and dictionaries should not be written\n\
   1800 (they will cause infinite loops).\n\
   1801 \n\
   1802 Variables:\n\
   1803 \n\
   1804 version -- indicates the format that the module uses. Version 0 is the\n\
   1805     historical format, version 1 shares interned strings and version 2\n\
   1806     uses a binary format for floating point numbers.\n\
   1807     Version 3 shares common object references (New in version 3.4).\n\
   1808 \n\
   1809 Functions:\n\
   1810 \n\
   1811 dump() -- write value to a file\n\
   1812 load() -- read value from a file\n\
   1813 dumps() -- write value to a string\n\
   1814 loads() -- read value from a string");
   1815 
   1816 
   1817 
   1818 static struct PyModuleDef marshalmodule = {
   1819     PyModuleDef_HEAD_INIT,
   1820     "marshal",
   1821     module_doc,
   1822     0,
   1823     marshal_methods,
   1824     NULL,
   1825     NULL,
   1826     NULL,
   1827     NULL
   1828 };
   1829 
   1830 PyMODINIT_FUNC
   1831 PyMarshal_Init(void)
   1832 {
   1833     PyObject *mod = PyModule_Create(&marshalmodule);
   1834     if (mod == NULL)
   1835         return NULL;
   1836     PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);
   1837     return mod;
   1838 }
   1839