Home | History | Annotate | Download | only in Modules
      1 #include "Python.h"
      2 #include "structmember.h"
      3 #include "osdefs.h"
      4 #include "marshal.h"
      5 #include <time.h>
      6 
      7 
      8 #define IS_SOURCE   0x0
      9 #define IS_BYTECODE 0x1
     10 #define IS_PACKAGE  0x2
     11 
     12 struct st_zip_searchorder {
     13     char suffix[14];
     14     int type;
     15 };
     16 
     17 /* zip_searchorder defines how we search for a module in the Zip
     18    archive: we first search for a package __init__, then for
     19    non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
     20    are swapped by initzipimport() if we run in optimized mode. Also,
     21    '/' is replaced by SEP there. */
     22 static struct st_zip_searchorder zip_searchorder[] = {
     23     {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
     24     {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
     25     {"/__init__.py", IS_PACKAGE | IS_SOURCE},
     26     {".pyc", IS_BYTECODE},
     27     {".pyo", IS_BYTECODE},
     28     {".py", IS_SOURCE},
     29     {"", 0}
     30 };
     31 
     32 /* zipimporter object definition and support */
     33 
     34 typedef struct _zipimporter ZipImporter;
     35 
     36 struct _zipimporter {
     37     PyObject_HEAD
     38     PyObject *archive;  /* pathname of the Zip archive */
     39     PyObject *prefix;   /* file prefix: "a/sub/directory/" */
     40     PyObject *files;    /* dict with file info {path: toc_entry} */
     41 };
     42 
     43 static PyObject *ZipImportError;
     44 static PyObject *zip_directory_cache = NULL;
     45 
     46 /* forward decls */
     47 static PyObject *read_directory(char *archive);
     48 static PyObject *get_data(char *archive, PyObject *toc_entry);
     49 static PyObject *get_module_code(ZipImporter *self, char *fullname,
     50                                  int *p_ispackage, char **p_modpath);
     51 
     52 
     53 #define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
     54 
     55 
     56 /* zipimporter.__init__
     57    Split the "subdirectory" from the Zip archive path, lookup a matching
     58    entry in sys.path_importer_cache, fetch the file directory from there
     59    if found, or else read it from the archive. */
     60 static int
     61 zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
     62 {
     63     char *path, *p, *prefix, buf[MAXPATHLEN+2];
     64     size_t len;
     65 
     66     if (!_PyArg_NoKeywords("zipimporter()", kwds))
     67         return -1;
     68 
     69     if (!PyArg_ParseTuple(args, "s:zipimporter",
     70                           &path))
     71         return -1;
     72 
     73     len = strlen(path);
     74     if (len == 0) {
     75         PyErr_SetString(ZipImportError, "archive path is empty");
     76         return -1;
     77     }
     78     if (len >= MAXPATHLEN) {
     79         PyErr_SetString(ZipImportError,
     80                         "archive path too long");
     81         return -1;
     82     }
     83     strcpy(buf, path);
     84 
     85 #ifdef ALTSEP
     86     for (p = buf; *p; p++) {
     87         if (*p == ALTSEP)
     88             *p = SEP;
     89     }
     90 #endif
     91 
     92     path = NULL;
     93     prefix = NULL;
     94     for (;;) {
     95 #ifndef RISCOS
     96         struct stat statbuf;
     97         int rv;
     98 
     99         rv = stat(buf, &statbuf);
    100         if (rv == 0) {
    101             /* it exists */
    102             if (S_ISREG(statbuf.st_mode))
    103                 /* it's a file */
    104                 path = buf;
    105             break;
    106         }
    107 #else
    108         if (object_exists(buf)) {
    109             /* it exists */
    110             if (isfile(buf))
    111                 /* it's a file */
    112                 path = buf;
    113             break;
    114         }
    115 #endif
    116         /* back up one path element */
    117         p = strrchr(buf, SEP);
    118         if (prefix != NULL)
    119             *prefix = SEP;
    120         if (p == NULL)
    121             break;
    122         *p = '\0';
    123         prefix = p;
    124     }
    125     if (path != NULL) {
    126         PyObject *files;
    127         files = PyDict_GetItemString(zip_directory_cache, path);
    128         if (files == NULL) {
    129             files = read_directory(buf);
    130             if (files == NULL)
    131                 return -1;
    132             if (PyDict_SetItemString(zip_directory_cache, path,
    133                                      files) != 0)
    134                 return -1;
    135         }
    136         else
    137             Py_INCREF(files);
    138         self->files = files;
    139     }
    140     else {
    141         PyErr_SetString(ZipImportError, "not a Zip file");
    142         return -1;
    143     }
    144 
    145     if (prefix == NULL)
    146         prefix = "";
    147     else {
    148         prefix++;
    149         len = strlen(prefix);
    150         if (prefix[len-1] != SEP) {
    151             /* add trailing SEP */
    152             prefix[len] = SEP;
    153             prefix[len + 1] = '\0';
    154         }
    155     }
    156 
    157     self->archive = PyString_FromString(buf);
    158     if (self->archive == NULL)
    159         return -1;
    160 
    161     self->prefix = PyString_FromString(prefix);
    162     if (self->prefix == NULL)
    163         return -1;
    164 
    165     return 0;
    166 }
    167 
    168 /* GC support. */
    169 static int
    170 zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
    171 {
    172     ZipImporter *self = (ZipImporter *)obj;
    173     Py_VISIT(self->files);
    174     return 0;
    175 }
    176 
    177 static void
    178 zipimporter_dealloc(ZipImporter *self)
    179 {
    180     PyObject_GC_UnTrack(self);
    181     Py_XDECREF(self->archive);
    182     Py_XDECREF(self->prefix);
    183     Py_XDECREF(self->files);
    184     Py_TYPE(self)->tp_free((PyObject *)self);
    185 }
    186 
    187 static PyObject *
    188 zipimporter_repr(ZipImporter *self)
    189 {
    190     char buf[500];
    191     char *archive = "???";
    192     char *prefix = "";
    193 
    194     if (self->archive != NULL && PyString_Check(self->archive))
    195         archive = PyString_AsString(self->archive);
    196     if (self->prefix != NULL && PyString_Check(self->prefix))
    197         prefix = PyString_AsString(self->prefix);
    198     if (prefix != NULL && *prefix)
    199         PyOS_snprintf(buf, sizeof(buf),
    200                       "<zipimporter object \"%.300s%c%.150s\">",
    201                       archive, SEP, prefix);
    202     else
    203         PyOS_snprintf(buf, sizeof(buf),
    204                       "<zipimporter object \"%.300s\">",
    205                       archive);
    206     return PyString_FromString(buf);
    207 }
    208 
    209 /* return fullname.split(".")[-1] */
    210 static char *
    211 get_subname(char *fullname)
    212 {
    213     char *subname = strrchr(fullname, '.');
    214     if (subname == NULL)
    215         subname = fullname;
    216     else
    217         subname++;
    218     return subname;
    219 }
    220 
    221 /* Given a (sub)modulename, write the potential file path in the
    222    archive (without extension) to the path buffer. Return the
    223    length of the resulting string. */
    224 static int
    225 make_filename(char *prefix, char *name, char *path)
    226 {
    227     size_t len;
    228     char *p;
    229 
    230     len = strlen(prefix);
    231 
    232     /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
    233     if (len + strlen(name) + 13 >= MAXPATHLEN) {
    234         PyErr_SetString(ZipImportError, "path too long");
    235         return -1;
    236     }
    237 
    238     strcpy(path, prefix);
    239     strcpy(path + len, name);
    240     for (p = path + len; *p; p++) {
    241         if (*p == '.')
    242             *p = SEP;
    243     }
    244     len += strlen(name);
    245     assert(len < INT_MAX);
    246     return (int)len;
    247 }
    248 
    249 enum zi_module_info {
    250     MI_ERROR,
    251     MI_NOT_FOUND,
    252     MI_MODULE,
    253     MI_PACKAGE
    254 };
    255 
    256 /* Return some information about a module. */
    257 static enum zi_module_info
    258 get_module_info(ZipImporter *self, char *fullname)
    259 {
    260     char *subname, path[MAXPATHLEN + 1];
    261     int len;
    262     struct st_zip_searchorder *zso;
    263 
    264     subname = get_subname(fullname);
    265 
    266     len = make_filename(PyString_AsString(self->prefix), subname, path);
    267     if (len < 0)
    268         return MI_ERROR;
    269 
    270     for (zso = zip_searchorder; *zso->suffix; zso++) {
    271         strcpy(path + len, zso->suffix);
    272         if (PyDict_GetItemString(self->files, path) != NULL) {
    273             if (zso->type & IS_PACKAGE)
    274                 return MI_PACKAGE;
    275             else
    276                 return MI_MODULE;
    277         }
    278     }
    279     return MI_NOT_FOUND;
    280 }
    281 
    282 /* Check whether we can satisfy the import of the module named by
    283    'fullname'. Return self if we can, None if we can't. */
    284 static PyObject *
    285 zipimporter_find_module(PyObject *obj, PyObject *args)
    286 {
    287     ZipImporter *self = (ZipImporter *)obj;
    288     PyObject *path = NULL;
    289     char *fullname;
    290     enum zi_module_info mi;
    291 
    292     if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
    293                           &fullname, &path))
    294         return NULL;
    295 
    296     mi = get_module_info(self, fullname);
    297     if (mi == MI_ERROR)
    298         return NULL;
    299     if (mi == MI_NOT_FOUND) {
    300         Py_INCREF(Py_None);
    301         return Py_None;
    302     }
    303     Py_INCREF(self);
    304     return (PyObject *)self;
    305 }
    306 
    307 /* Load and return the module named by 'fullname'. */
    308 static PyObject *
    309 zipimporter_load_module(PyObject *obj, PyObject *args)
    310 {
    311     ZipImporter *self = (ZipImporter *)obj;
    312     PyObject *code, *mod, *dict;
    313     char *fullname, *modpath;
    314     int ispackage;
    315 
    316     if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
    317                           &fullname))
    318         return NULL;
    319 
    320     code = get_module_code(self, fullname, &ispackage, &modpath);
    321     if (code == NULL)
    322         return NULL;
    323 
    324     mod = PyImport_AddModule(fullname);
    325     if (mod == NULL) {
    326         Py_DECREF(code);
    327         return NULL;
    328     }
    329     dict = PyModule_GetDict(mod);
    330 
    331     /* mod.__loader__ = self */
    332     if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
    333         goto error;
    334 
    335     if (ispackage) {
    336         /* add __path__ to the module *before* the code gets
    337            executed */
    338         PyObject *pkgpath, *fullpath;
    339         char *prefix = PyString_AsString(self->prefix);
    340         char *subname = get_subname(fullname);
    341         int err;
    342 
    343         fullpath = PyString_FromFormat("%s%c%s%s",
    344                                 PyString_AsString(self->archive),
    345                                 SEP,
    346                                 *prefix ? prefix : "",
    347                                 subname);
    348         if (fullpath == NULL)
    349             goto error;
    350 
    351         pkgpath = Py_BuildValue("[O]", fullpath);
    352         Py_DECREF(fullpath);
    353         if (pkgpath == NULL)
    354             goto error;
    355         err = PyDict_SetItemString(dict, "__path__", pkgpath);
    356         Py_DECREF(pkgpath);
    357         if (err != 0)
    358             goto error;
    359     }
    360     mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
    361     Py_DECREF(code);
    362     if (Py_VerboseFlag)
    363         PySys_WriteStderr("import %s # loaded from Zip %s\n",
    364                           fullname, modpath);
    365     return mod;
    366 error:
    367     Py_DECREF(code);
    368     Py_DECREF(mod);
    369     return NULL;
    370 }
    371 
    372 /* Return a string matching __file__ for the named module */
    373 static PyObject *
    374 zipimporter_get_filename(PyObject *obj, PyObject *args)
    375 {
    376     ZipImporter *self = (ZipImporter *)obj;
    377     PyObject *code;
    378     char *fullname, *modpath;
    379     int ispackage;
    380 
    381     if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
    382                          &fullname))
    383     return NULL;
    384 
    385     /* Deciding the filename requires working out where the code
    386        would come from if the module was actually loaded */
    387     code = get_module_code(self, fullname, &ispackage, &modpath);
    388     if (code == NULL)
    389     return NULL;
    390     Py_DECREF(code); /* Only need the path info */
    391 
    392     return PyString_FromString(modpath);
    393 }
    394 
    395 /* Return a bool signifying whether the module is a package or not. */
    396 static PyObject *
    397 zipimporter_is_package(PyObject *obj, PyObject *args)
    398 {
    399     ZipImporter *self = (ZipImporter *)obj;
    400     char *fullname;
    401     enum zi_module_info mi;
    402 
    403     if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
    404                           &fullname))
    405         return NULL;
    406 
    407     mi = get_module_info(self, fullname);
    408     if (mi == MI_ERROR)
    409         return NULL;
    410     if (mi == MI_NOT_FOUND) {
    411         PyErr_Format(ZipImportError, "can't find module '%.200s'",
    412                      fullname);
    413         return NULL;
    414     }
    415     return PyBool_FromLong(mi == MI_PACKAGE);
    416 }
    417 
    418 static PyObject *
    419 zipimporter_get_data(PyObject *obj, PyObject *args)
    420 {
    421     ZipImporter *self = (ZipImporter *)obj;
    422     char *path;
    423 #ifdef ALTSEP
    424     char *p, buf[MAXPATHLEN + 1];
    425 #endif
    426     PyObject *toc_entry;
    427     Py_ssize_t len;
    428 
    429     if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
    430         return NULL;
    431 
    432 #ifdef ALTSEP
    433     if (strlen(path) >= MAXPATHLEN) {
    434         PyErr_SetString(ZipImportError, "path too long");
    435         return NULL;
    436     }
    437     strcpy(buf, path);
    438     for (p = buf; *p; p++) {
    439         if (*p == ALTSEP)
    440             *p = SEP;
    441     }
    442     path = buf;
    443 #endif
    444     len = PyString_Size(self->archive);
    445     if ((size_t)len < strlen(path) &&
    446         strncmp(path, PyString_AsString(self->archive), len) == 0 &&
    447         path[len] == SEP) {
    448         path = path + len + 1;
    449     }
    450 
    451     toc_entry = PyDict_GetItemString(self->files, path);
    452     if (toc_entry == NULL) {
    453         PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
    454         return NULL;
    455     }
    456     return get_data(PyString_AsString(self->archive), toc_entry);
    457 }
    458 
    459 static PyObject *
    460 zipimporter_get_code(PyObject *obj, PyObject *args)
    461 {
    462     ZipImporter *self = (ZipImporter *)obj;
    463     char *fullname;
    464 
    465     if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
    466         return NULL;
    467 
    468     return get_module_code(self, fullname, NULL, NULL);
    469 }
    470 
    471 static PyObject *
    472 zipimporter_get_source(PyObject *obj, PyObject *args)
    473 {
    474     ZipImporter *self = (ZipImporter *)obj;
    475     PyObject *toc_entry;
    476     char *fullname, *subname, path[MAXPATHLEN+1];
    477     int len;
    478     enum zi_module_info mi;
    479 
    480     if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
    481         return NULL;
    482 
    483     mi = get_module_info(self, fullname);
    484     if (mi == MI_ERROR)
    485         return NULL;
    486     if (mi == MI_NOT_FOUND) {
    487         PyErr_Format(ZipImportError, "can't find module '%.200s'",
    488                      fullname);
    489         return NULL;
    490     }
    491     subname = get_subname(fullname);
    492 
    493     len = make_filename(PyString_AsString(self->prefix), subname, path);
    494     if (len < 0)
    495         return NULL;
    496 
    497     if (mi == MI_PACKAGE) {
    498         path[len] = SEP;
    499         strcpy(path + len + 1, "__init__.py");
    500     }
    501     else
    502         strcpy(path + len, ".py");
    503 
    504     toc_entry = PyDict_GetItemString(self->files, path);
    505     if (toc_entry != NULL)
    506         return get_data(PyString_AsString(self->archive), toc_entry);
    507 
    508     /* we have the module, but no source */
    509     Py_INCREF(Py_None);
    510     return Py_None;
    511 }
    512 
    513 PyDoc_STRVAR(doc_find_module,
    514 "find_module(fullname, path=None) -> self or None.\n\
    515 \n\
    516 Search for a module specified by 'fullname'. 'fullname' must be the\n\
    517 fully qualified (dotted) module name. It returns the zipimporter\n\
    518 instance itself if the module was found, or None if it wasn't.\n\
    519 The optional 'path' argument is ignored -- it's there for compatibility\n\
    520 with the importer protocol.");
    521 
    522 PyDoc_STRVAR(doc_load_module,
    523 "load_module(fullname) -> module.\n\
    524 \n\
    525 Load the module specified by 'fullname'. 'fullname' must be the\n\
    526 fully qualified (dotted) module name. It returns the imported\n\
    527 module, or raises ZipImportError if it wasn't found.");
    528 
    529 PyDoc_STRVAR(doc_get_data,
    530 "get_data(pathname) -> string with file data.\n\
    531 \n\
    532 Return the data associated with 'pathname'. Raise IOError if\n\
    533 the file wasn't found.");
    534 
    535 PyDoc_STRVAR(doc_is_package,
    536 "is_package(fullname) -> bool.\n\
    537 \n\
    538 Return True if the module specified by fullname is a package.\n\
    539 Raise ZipImportError if the module couldn't be found.");
    540 
    541 PyDoc_STRVAR(doc_get_code,
    542 "get_code(fullname) -> code object.\n\
    543 \n\
    544 Return the code object for the specified module. Raise ZipImportError\n\
    545 if the module couldn't be found.");
    546 
    547 PyDoc_STRVAR(doc_get_source,
    548 "get_source(fullname) -> source string.\n\
    549 \n\
    550 Return the source code for the specified module. Raise ZipImportError\n\
    551 if the module couldn't be found, return None if the archive does\n\
    552 contain the module, but has no source for it.");
    553 
    554 
    555 PyDoc_STRVAR(doc_get_filename,
    556 "get_filename(fullname) -> filename string.\n\
    557 \n\
    558 Return the filename for the specified module.");
    559 
    560 static PyMethodDef zipimporter_methods[] = {
    561     {"find_module", zipimporter_find_module, METH_VARARGS,
    562      doc_find_module},
    563     {"load_module", zipimporter_load_module, METH_VARARGS,
    564      doc_load_module},
    565     {"get_data", zipimporter_get_data, METH_VARARGS,
    566      doc_get_data},
    567     {"get_code", zipimporter_get_code, METH_VARARGS,
    568      doc_get_code},
    569     {"get_source", zipimporter_get_source, METH_VARARGS,
    570      doc_get_source},
    571     {"get_filename", zipimporter_get_filename, METH_VARARGS,
    572      doc_get_filename},
    573     {"is_package", zipimporter_is_package, METH_VARARGS,
    574      doc_is_package},
    575     {NULL,              NULL}   /* sentinel */
    576 };
    577 
    578 static PyMemberDef zipimporter_members[] = {
    579     {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
    580     {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
    581     {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
    582     {NULL}
    583 };
    584 
    585 PyDoc_STRVAR(zipimporter_doc,
    586 "zipimporter(archivepath) -> zipimporter object\n\
    587 \n\
    588 Create a new zipimporter instance. 'archivepath' must be a path to\n\
    589 a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
    590 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
    591 valid directory inside the archive.\n\
    592 \n\
    593 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
    594 archive.\n\
    595 \n\
    596 The 'archive' attribute of zipimporter objects contains the name of the\n\
    597 zipfile targeted.");
    598 
    599 #define DEFERRED_ADDRESS(ADDR) 0
    600 
    601 static PyTypeObject ZipImporter_Type = {
    602     PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
    603     "zipimport.zipimporter",
    604     sizeof(ZipImporter),
    605     0,                                          /* tp_itemsize */
    606     (destructor)zipimporter_dealloc,            /* tp_dealloc */
    607     0,                                          /* tp_print */
    608     0,                                          /* tp_getattr */
    609     0,                                          /* tp_setattr */
    610     0,                                          /* tp_compare */
    611     (reprfunc)zipimporter_repr,                 /* tp_repr */
    612     0,                                          /* tp_as_number */
    613     0,                                          /* tp_as_sequence */
    614     0,                                          /* tp_as_mapping */
    615     0,                                          /* tp_hash */
    616     0,                                          /* tp_call */
    617     0,                                          /* tp_str */
    618     PyObject_GenericGetAttr,                    /* tp_getattro */
    619     0,                                          /* tp_setattro */
    620     0,                                          /* tp_as_buffer */
    621     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
    622         Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
    623     zipimporter_doc,                            /* tp_doc */
    624     zipimporter_traverse,                       /* tp_traverse */
    625     0,                                          /* tp_clear */
    626     0,                                          /* tp_richcompare */
    627     0,                                          /* tp_weaklistoffset */
    628     0,                                          /* tp_iter */
    629     0,                                          /* tp_iternext */
    630     zipimporter_methods,                        /* tp_methods */
    631     zipimporter_members,                        /* tp_members */
    632     0,                                          /* tp_getset */
    633     0,                                          /* tp_base */
    634     0,                                          /* tp_dict */
    635     0,                                          /* tp_descr_get */
    636     0,                                          /* tp_descr_set */
    637     0,                                          /* tp_dictoffset */
    638     (initproc)zipimporter_init,                 /* tp_init */
    639     PyType_GenericAlloc,                        /* tp_alloc */
    640     PyType_GenericNew,                          /* tp_new */
    641     PyObject_GC_Del,                            /* tp_free */
    642 };
    643 
    644 
    645 /* implementation */
    646 
    647 /* Given a buffer, return the long that is represented by the first
    648    4 bytes, encoded as little endian. This partially reimplements
    649    marshal.c:r_long() */
    650 static long
    651 get_long(unsigned char *buf) {
    652     long x;
    653     x =  buf[0];
    654     x |= (long)buf[1] <<  8;
    655     x |= (long)buf[2] << 16;
    656     x |= (long)buf[3] << 24;
    657 #if SIZEOF_LONG > 4
    658     /* Sign extension for 64-bit machines */
    659     x |= -(x & 0x80000000L);
    660 #endif
    661     return x;
    662 }
    663 
    664 /*
    665    read_directory(archive) -> files dict (new reference)
    666 
    667    Given a path to a Zip archive, build a dict, mapping file names
    668    (local to the archive, using SEP as a separator) to toc entries.
    669 
    670    A toc_entry is a tuple:
    671 
    672        (__file__,      # value to use for __file__, available for all files
    673     compress,      # compression kind; 0 for uncompressed
    674     data_size,     # size of compressed data on disk
    675     file_size,     # size of decompressed data
    676     file_offset,   # offset of file header from start of archive
    677     time,          # mod time of file (in dos format)
    678     date,          # mod data of file (in dos format)
    679     crc,           # crc checksum of the data
    680        )
    681 
    682    Directories can be recognized by the trailing SEP in the name,
    683    data_size and file_offset are 0.
    684 */
    685 static PyObject *
    686 read_directory(char *archive)
    687 {
    688     PyObject *files = NULL;
    689     FILE *fp;
    690     long compress, crc, data_size, file_size, file_offset, date, time;
    691     long header_offset, name_size, header_size, header_position;
    692     long i, l, count;
    693     size_t length;
    694     char path[MAXPATHLEN + 5];
    695     char name[MAXPATHLEN + 5];
    696     char *p, endof_central_dir[22];
    697     long arc_offset; /* offset from beginning of file to start of zip-archive */
    698 
    699     if (strlen(archive) > MAXPATHLEN) {
    700         PyErr_SetString(PyExc_OverflowError,
    701                         "Zip path name is too long");
    702         return NULL;
    703     }
    704     strcpy(path, archive);
    705 
    706     fp = fopen(archive, "rb");
    707     if (fp == NULL) {
    708         PyErr_Format(ZipImportError, "can't open Zip file: "
    709                      "'%.200s'", archive);
    710         return NULL;
    711     }
    712 
    713     if (fseek(fp, -22, SEEK_END) == -1) {
    714         fclose(fp);
    715         PyErr_Format(ZipImportError, "can't read Zip file: %s", archive);
    716         return NULL;
    717     }
    718     header_position = ftell(fp);
    719     if (fread(endof_central_dir, 1, 22, fp) != 22) {
    720         fclose(fp);
    721         PyErr_Format(ZipImportError, "can't read Zip file: "
    722                      "'%.200s'", archive);
    723         return NULL;
    724     }
    725     if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
    726         /* Bad: End of Central Dir signature */
    727         fclose(fp);
    728         PyErr_Format(ZipImportError, "not a Zip file: "
    729                      "'%.200s'", archive);
    730         return NULL;
    731     }
    732 
    733     header_size = get_long((unsigned char *)endof_central_dir + 12);
    734     header_offset = get_long((unsigned char *)endof_central_dir + 16);
    735     arc_offset = header_position - header_offset - header_size;
    736     header_offset += arc_offset;
    737 
    738     files = PyDict_New();
    739     if (files == NULL)
    740         goto error;
    741 
    742     length = (long)strlen(path);
    743     path[length] = SEP;
    744 
    745     /* Start of Central Directory */
    746     count = 0;
    747     for (;;) {
    748         PyObject *t;
    749         int err;
    750 
    751         if (fseek(fp, header_offset, 0) == -1)  /* Start of file header */
    752             goto fseek_error;
    753         l = PyMarshal_ReadLongFromFile(fp);
    754         if (l != 0x02014B50)
    755             break;              /* Bad: Central Dir File Header */
    756         if (fseek(fp, header_offset + 10, 0) == -1)
    757             goto fseek_error;
    758         compress = PyMarshal_ReadShortFromFile(fp);
    759         time = PyMarshal_ReadShortFromFile(fp);
    760         date = PyMarshal_ReadShortFromFile(fp);
    761         crc = PyMarshal_ReadLongFromFile(fp);
    762         data_size = PyMarshal_ReadLongFromFile(fp);
    763         file_size = PyMarshal_ReadLongFromFile(fp);
    764         name_size = PyMarshal_ReadShortFromFile(fp);
    765         header_size = 46 + name_size +
    766            PyMarshal_ReadShortFromFile(fp) +
    767            PyMarshal_ReadShortFromFile(fp);
    768         if (fseek(fp, header_offset + 42, 0) == -1)
    769             goto fseek_error;
    770         file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
    771         if (name_size > MAXPATHLEN)
    772             name_size = MAXPATHLEN;
    773 
    774         p = name;
    775         for (i = 0; i < name_size; i++) {
    776             *p = (char)getc(fp);
    777             if (*p == '/')
    778                 *p = SEP;
    779             p++;
    780         }
    781         *p = 0;         /* Add terminating null byte */
    782         header_offset += header_size;
    783 
    784         strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
    785 
    786         t = Py_BuildValue("siiiiiii", path, compress, data_size,
    787                           file_size, file_offset, time, date, crc);
    788         if (t == NULL)
    789             goto error;
    790         err = PyDict_SetItemString(files, name, t);
    791         Py_DECREF(t);
    792         if (err != 0)
    793             goto error;
    794         count++;
    795     }
    796     fclose(fp);
    797     if (Py_VerboseFlag)
    798         PySys_WriteStderr("# zipimport: found %ld names in %s\n",
    799             count, archive);
    800     return files;
    801 fseek_error:
    802     fclose(fp);
    803     Py_XDECREF(files);
    804     PyErr_Format(ZipImportError, "can't read Zip file: %s", archive);
    805     return NULL;
    806 error:
    807     fclose(fp);
    808     Py_XDECREF(files);
    809     return NULL;
    810 }
    811 
    812 /* Return the zlib.decompress function object, or NULL if zlib couldn't
    813    be imported. The function is cached when found, so subsequent calls
    814    don't import zlib again. */
    815 static PyObject *
    816 get_decompress_func(void)
    817 {
    818     static int importing_zlib = 0;
    819     PyObject *zlib;
    820     PyObject *decompress;
    821 
    822     if (importing_zlib != 0)
    823         /* Someone has a zlib.py[co] in their Zip file;
    824            let's avoid a stack overflow. */
    825         return NULL;
    826     importing_zlib = 1;
    827     zlib = PyImport_ImportModuleNoBlock("zlib");
    828     importing_zlib = 0;
    829     if (zlib != NULL) {
    830         decompress = PyObject_GetAttrString(zlib,
    831                                             "decompress");
    832         Py_DECREF(zlib);
    833     }
    834     else {
    835         PyErr_Clear();
    836         decompress = NULL;
    837     }
    838     if (Py_VerboseFlag)
    839         PySys_WriteStderr("# zipimport: zlib %s\n",
    840             zlib != NULL ? "available": "UNAVAILABLE");
    841     return decompress;
    842 }
    843 
    844 /* Given a path to a Zip file and a toc_entry, return the (uncompressed)
    845    data as a new reference. */
    846 static PyObject *
    847 get_data(char *archive, PyObject *toc_entry)
    848 {
    849     PyObject *raw_data, *data = NULL, *decompress;
    850     char *buf;
    851     FILE *fp;
    852     int err;
    853     Py_ssize_t bytes_read = 0;
    854     long l;
    855     char *datapath;
    856     long compress, data_size, file_size, file_offset;
    857     long time, date, crc;
    858 
    859     if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
    860                           &data_size, &file_size, &file_offset, &time,
    861                           &date, &crc)) {
    862         return NULL;
    863     }
    864 
    865     fp = fopen(archive, "rb");
    866     if (!fp) {
    867         PyErr_Format(PyExc_IOError,
    868            "zipimport: can not open file %s", archive);
    869         return NULL;
    870     }
    871 
    872     /* Check to make sure the local file header is correct */
    873     if (fseek(fp, file_offset, 0) == -1) {
    874         fclose(fp);
    875         PyErr_Format(ZipImportError, "can't read Zip file: %s", archive);
    876         return NULL;
    877     }
    878 
    879     l = PyMarshal_ReadLongFromFile(fp);
    880     if (l != 0x04034B50) {
    881         /* Bad: Local File Header */
    882         PyErr_Format(ZipImportError,
    883                      "bad local file header in %s",
    884                      archive);
    885         fclose(fp);
    886         return NULL;
    887     }
    888     if (fseek(fp, file_offset + 26, 0) == -1) {
    889         fclose(fp);
    890         PyErr_Format(ZipImportError, "can't read Zip file: %s", archive);
    891         return NULL;
    892     }
    893 
    894     l = 30 + PyMarshal_ReadShortFromFile(fp) +
    895         PyMarshal_ReadShortFromFile(fp);        /* local header size */
    896     file_offset += l;           /* Start of file data */
    897 
    898     raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
    899                                           data_size : data_size + 1);
    900     if (raw_data == NULL) {
    901         fclose(fp);
    902         return NULL;
    903     }
    904     buf = PyString_AsString(raw_data);
    905 
    906     err = fseek(fp, file_offset, 0);
    907     if (err == 0) {
    908         bytes_read = fread(buf, 1, data_size, fp);
    909     } else {
    910         fclose(fp);
    911         PyErr_Format(ZipImportError, "can't read Zip file: %s", archive);
    912         return NULL;
    913     }
    914     fclose(fp);
    915     if (err || bytes_read != data_size) {
    916         PyErr_SetString(PyExc_IOError,
    917                         "zipimport: can't read data");
    918         Py_DECREF(raw_data);
    919         return NULL;
    920     }
    921 
    922     if (compress != 0) {
    923         buf[data_size] = 'Z';  /* saw this in zipfile.py */
    924         data_size++;
    925     }
    926     buf[data_size] = '\0';
    927 
    928     if (compress == 0)  /* data is not compressed */
    929         return raw_data;
    930 
    931     /* Decompress with zlib */
    932     decompress = get_decompress_func();
    933     if (decompress == NULL) {
    934         PyErr_SetString(ZipImportError,
    935                         "can't decompress data; "
    936                         "zlib not available");
    937         goto error;
    938     }
    939     data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
    940     Py_DECREF(decompress);
    941 error:
    942     Py_DECREF(raw_data);
    943     return data;
    944 }
    945 
    946 /* Lenient date/time comparison function. The precision of the mtime
    947    in the archive is lower than the mtime stored in a .pyc: we
    948    must allow a difference of at most one second. */
    949 static int
    950 eq_mtime(time_t t1, time_t t2)
    951 {
    952     time_t d = t1 - t2;
    953     if (d < 0)
    954         d = -d;
    955     /* dostime only stores even seconds, so be lenient */
    956     return d <= 1;
    957 }
    958 
    959 /* Given the contents of a .py[co] file in a buffer, unmarshal the data
    960    and return the code object. Return None if it the magic word doesn't
    961    match (we do this instead of raising an exception as we fall back
    962    to .py if available and we don't want to mask other errors).
    963    Returns a new reference. */
    964 static PyObject *
    965 unmarshal_code(char *pathname, PyObject *data, time_t mtime)
    966 {
    967     PyObject *code;
    968     char *buf = PyString_AsString(data);
    969     Py_ssize_t size = PyString_Size(data);
    970 
    971     if (size <= 9) {
    972         PyErr_SetString(ZipImportError,
    973                         "bad pyc data");
    974         return NULL;
    975     }
    976 
    977     if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
    978         if (Py_VerboseFlag)
    979             PySys_WriteStderr("# %s has bad magic\n",
    980                               pathname);
    981         Py_INCREF(Py_None);
    982         return Py_None;  /* signal caller to try alternative */
    983     }
    984 
    985     if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
    986                                 mtime)) {
    987         if (Py_VerboseFlag)
    988             PySys_WriteStderr("# %s has bad mtime\n",
    989                               pathname);
    990         Py_INCREF(Py_None);
    991         return Py_None;  /* signal caller to try alternative */
    992     }
    993 
    994     code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
    995     if (code == NULL)
    996         return NULL;
    997     if (!PyCode_Check(code)) {
    998         Py_DECREF(code);
    999         PyErr_Format(PyExc_TypeError,
   1000              "compiled module %.200s is not a code object",
   1001              pathname);
   1002         return NULL;
   1003     }
   1004     return code;
   1005 }
   1006 
   1007 /* Replace any occurances of "\r\n?" in the input string with "\n".
   1008    This converts DOS and Mac line endings to Unix line endings.
   1009    Also append a trailing "\n" to be compatible with
   1010    PyParser_SimpleParseFile(). Returns a new reference. */
   1011 static PyObject *
   1012 normalize_line_endings(PyObject *source)
   1013 {
   1014     char *buf, *q, *p = PyString_AsString(source);
   1015     PyObject *fixed_source;
   1016 
   1017     if (!p)
   1018         return NULL;
   1019 
   1020     /* one char extra for trailing \n and one for terminating \0 */
   1021     buf = (char *)PyMem_Malloc(PyString_Size(source) + 2);
   1022     if (buf == NULL) {
   1023         PyErr_SetString(PyExc_MemoryError,
   1024                         "zipimport: no memory to allocate "
   1025                         "source buffer");
   1026         return NULL;
   1027     }
   1028     /* replace "\r\n?" by "\n" */
   1029     for (q = buf; *p != '\0'; p++) {
   1030         if (*p == '\r') {
   1031             *q++ = '\n';
   1032             if (*(p + 1) == '\n')
   1033                 p++;
   1034         }
   1035         else
   1036             *q++ = *p;
   1037     }
   1038     *q++ = '\n';  /* add trailing \n */
   1039     *q = '\0';
   1040     fixed_source = PyString_FromString(buf);
   1041     PyMem_Free(buf);
   1042     return fixed_source;
   1043 }
   1044 
   1045 /* Given a string buffer containing Python source code, compile it
   1046    return and return a code object as a new reference. */
   1047 static PyObject *
   1048 compile_source(char *pathname, PyObject *source)
   1049 {
   1050     PyObject *code, *fixed_source;
   1051 
   1052     fixed_source = normalize_line_endings(source);
   1053     if (fixed_source == NULL)
   1054         return NULL;
   1055 
   1056     code = Py_CompileString(PyString_AsString(fixed_source), pathname,
   1057                             Py_file_input);
   1058     Py_DECREF(fixed_source);
   1059     return code;
   1060 }
   1061 
   1062 /* Convert the date/time values found in the Zip archive to a value
   1063    that's compatible with the time stamp stored in .pyc files. */
   1064 static time_t
   1065 parse_dostime(int dostime, int dosdate)
   1066 {
   1067     struct tm stm;
   1068 
   1069     memset((void *) &stm, '\0', sizeof(stm));
   1070 
   1071     stm.tm_sec   =  (dostime        & 0x1f) * 2;
   1072     stm.tm_min   =  (dostime >> 5)  & 0x3f;
   1073     stm.tm_hour  =  (dostime >> 11) & 0x1f;
   1074     stm.tm_mday  =   dosdate        & 0x1f;
   1075     stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
   1076     stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
   1077     stm.tm_isdst =   -1; /* wday/yday is ignored */
   1078 
   1079     return mktime(&stm);
   1080 }
   1081 
   1082 /* Given a path to a .pyc or .pyo file in the archive, return the
   1083    modification time of the matching .py file, or 0 if no source
   1084    is available. */
   1085 static time_t
   1086 get_mtime_of_source(ZipImporter *self, char *path)
   1087 {
   1088     PyObject *toc_entry;
   1089     time_t mtime = 0;
   1090     Py_ssize_t lastchar = strlen(path) - 1;
   1091     char savechar = path[lastchar];
   1092     path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
   1093     toc_entry = PyDict_GetItemString(self->files, path);
   1094     if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
   1095         PyTuple_Size(toc_entry) == 8) {
   1096         /* fetch the time stamp of the .py file for comparison
   1097            with an embedded pyc time stamp */
   1098         int time, date;
   1099         time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
   1100         date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
   1101         mtime = parse_dostime(time, date);
   1102     }
   1103     path[lastchar] = savechar;
   1104     return mtime;
   1105 }
   1106 
   1107 /* Return the code object for the module named by 'fullname' from the
   1108    Zip archive as a new reference. */
   1109 static PyObject *
   1110 get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
   1111                    time_t mtime, PyObject *toc_entry)
   1112 {
   1113     PyObject *data, *code;
   1114     char *modpath;
   1115     char *archive = PyString_AsString(self->archive);
   1116 
   1117     if (archive == NULL)
   1118         return NULL;
   1119 
   1120     data = get_data(archive, toc_entry);
   1121     if (data == NULL)
   1122         return NULL;
   1123 
   1124     modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
   1125 
   1126     if (isbytecode) {
   1127         code = unmarshal_code(modpath, data, mtime);
   1128     }
   1129     else {
   1130         code = compile_source(modpath, data);
   1131     }
   1132     Py_DECREF(data);
   1133     return code;
   1134 }
   1135 
   1136 /* Get the code object associated with the module specified by
   1137    'fullname'. */
   1138 static PyObject *
   1139 get_module_code(ZipImporter *self, char *fullname,
   1140                 int *p_ispackage, char **p_modpath)
   1141 {
   1142     PyObject *toc_entry;
   1143     char *subname, path[MAXPATHLEN + 1];
   1144     int len;
   1145     struct st_zip_searchorder *zso;
   1146 
   1147     subname = get_subname(fullname);
   1148 
   1149     len = make_filename(PyString_AsString(self->prefix), subname, path);
   1150     if (len < 0)
   1151         return NULL;
   1152 
   1153     for (zso = zip_searchorder; *zso->suffix; zso++) {
   1154         PyObject *code = NULL;
   1155 
   1156         strcpy(path + len, zso->suffix);
   1157         if (Py_VerboseFlag > 1)
   1158             PySys_WriteStderr("# trying %s%c%s\n",
   1159                               PyString_AsString(self->archive),
   1160                               SEP, path);
   1161         toc_entry = PyDict_GetItemString(self->files, path);
   1162         if (toc_entry != NULL) {
   1163             time_t mtime = 0;
   1164             int ispackage = zso->type & IS_PACKAGE;
   1165             int isbytecode = zso->type & IS_BYTECODE;
   1166 
   1167             if (isbytecode)
   1168                 mtime = get_mtime_of_source(self, path);
   1169             if (p_ispackage != NULL)
   1170                 *p_ispackage = ispackage;
   1171             code = get_code_from_data(self, ispackage,
   1172                                       isbytecode, mtime,
   1173                                       toc_entry);
   1174             if (code == Py_None) {
   1175                 /* bad magic number or non-matching mtime
   1176                    in byte code, try next */
   1177                 Py_DECREF(code);
   1178                 continue;
   1179             }
   1180             if (code != NULL && p_modpath != NULL)
   1181                 *p_modpath = PyString_AsString(
   1182                     PyTuple_GetItem(toc_entry, 0));
   1183             return code;
   1184         }
   1185     }
   1186     PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
   1187     return NULL;
   1188 }
   1189 
   1190 
   1191 /* Module init */
   1192 
   1193 PyDoc_STRVAR(zipimport_doc,
   1194 "zipimport provides support for importing Python modules from Zip archives.\n\
   1195 \n\
   1196 This module exports three objects:\n\
   1197 - zipimporter: a class; its constructor takes a path to a Zip archive.\n\
   1198 - ZipImportError: exception raised by zipimporter objects. It's a\n\
   1199   subclass of ImportError, so it can be caught as ImportError, too.\n\
   1200 - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
   1201   info dicts, as used in zipimporter._files.\n\
   1202 \n\
   1203 It is usually not needed to use the zipimport module explicitly; it is\n\
   1204 used by the builtin import mechanism for sys.path items that are paths\n\
   1205 to Zip archives.");
   1206 
   1207 PyMODINIT_FUNC
   1208 initzipimport(void)
   1209 {
   1210     PyObject *mod;
   1211 
   1212     if (PyType_Ready(&ZipImporter_Type) < 0)
   1213         return;
   1214 
   1215     /* Correct directory separator */
   1216     zip_searchorder[0].suffix[0] = SEP;
   1217     zip_searchorder[1].suffix[0] = SEP;
   1218     zip_searchorder[2].suffix[0] = SEP;
   1219     if (Py_OptimizeFlag) {
   1220         /* Reverse *.pyc and *.pyo */
   1221         struct st_zip_searchorder tmp;
   1222         tmp = zip_searchorder[0];
   1223         zip_searchorder[0] = zip_searchorder[1];
   1224         zip_searchorder[1] = tmp;
   1225         tmp = zip_searchorder[3];
   1226         zip_searchorder[3] = zip_searchorder[4];
   1227         zip_searchorder[4] = tmp;
   1228     }
   1229 
   1230     mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
   1231                          NULL, PYTHON_API_VERSION);
   1232     if (mod == NULL)
   1233         return;
   1234 
   1235     ZipImportError = PyErr_NewException("zipimport.ZipImportError",
   1236                                         PyExc_ImportError, NULL);
   1237     if (ZipImportError == NULL)
   1238         return;
   1239 
   1240     Py_INCREF(ZipImportError);
   1241     if (PyModule_AddObject(mod, "ZipImportError",
   1242                            ZipImportError) < 0)
   1243         return;
   1244 
   1245     Py_INCREF(&ZipImporter_Type);
   1246     if (PyModule_AddObject(mod, "zipimporter",
   1247                            (PyObject *)&ZipImporter_Type) < 0)
   1248         return;
   1249 
   1250     zip_directory_cache = PyDict_New();
   1251     if (zip_directory_cache == NULL)
   1252         return;
   1253     Py_INCREF(zip_directory_cache);
   1254     if (PyModule_AddObject(mod, "_zip_directory_cache",
   1255                            zip_directory_cache) < 0)
   1256         return;
   1257 }
   1258