Home | History | Annotate | Download | only in Lib
      1 """Utilities to support packages."""
      2 
      3 from collections import namedtuple
      4 from functools import singledispatch as simplegeneric
      5 import importlib
      6 import importlib.util
      7 import importlib.machinery
      8 import os
      9 import os.path
     10 import sys
     11 from types import ModuleType
     12 import warnings
     13 
     14 __all__ = [
     15     'get_importer', 'iter_importers', 'get_loader', 'find_loader',
     16     'walk_packages', 'iter_modules', 'get_data',
     17     'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
     18     'ModuleInfo',
     19 ]
     20 
     21 
     22 ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg')
     23 ModuleInfo.__doc__ = 'A namedtuple with minimal info about a module.'
     24 
     25 
     26 def _get_spec(finder, name):
     27     """Return the finder-specific module spec."""
     28     # Works with legacy finders.
     29     try:
     30         find_spec = finder.find_spec
     31     except AttributeError:
     32         loader = finder.find_module(name)
     33         if loader is None:
     34             return None
     35         return importlib.util.spec_from_loader(name, loader)
     36     else:
     37         return find_spec(name)
     38 
     39 
     40 def read_code(stream):
     41     # This helper is needed in order for the PEP 302 emulation to
     42     # correctly handle compiled files
     43     import marshal
     44 
     45     magic = stream.read(4)
     46     if magic != importlib.util.MAGIC_NUMBER:
     47         return None
     48 
     49     stream.read(8) # Skip timestamp and size
     50     return marshal.load(stream)
     51 
     52 
     53 def walk_packages(path=None, prefix='', onerror=None):
     54     """Yields ModuleInfo for all modules recursively
     55     on path, or, if path is None, all accessible modules.
     56 
     57     'path' should be either None or a list of paths to look for
     58     modules in.
     59 
     60     'prefix' is a string to output on the front of every module name
     61     on output.
     62 
     63     Note that this function must import all *packages* (NOT all
     64     modules!) on the given path, in order to access the __path__
     65     attribute to find submodules.
     66 
     67     'onerror' is a function which gets called with one argument (the
     68     name of the package which was being imported) if any exception
     69     occurs while trying to import a package.  If no onerror function is
     70     supplied, ImportErrors are caught and ignored, while all other
     71     exceptions are propagated, terminating the search.
     72 
     73     Examples:
     74 
     75     # list all modules python can access
     76     walk_packages()
     77 
     78     # list all submodules of ctypes
     79     walk_packages(ctypes.__path__, ctypes.__name__+'.')
     80     """
     81 
     82     def seen(p, m={}):
     83         if p in m:
     84             return True
     85         m[p] = True
     86 
     87     for info in iter_modules(path, prefix):
     88         yield info
     89 
     90         if info.ispkg:
     91             try:
     92                 __import__(info.name)
     93             except ImportError:
     94                 if onerror is not None:
     95                     onerror(info.name)
     96             except Exception:
     97                 if onerror is not None:
     98                     onerror(info.name)
     99                 else:
    100                     raise
    101             else:
    102                 path = getattr(sys.modules[info.name], '__path__', None) or []
    103 
    104                 # don't traverse path items we've seen before
    105                 path = [p for p in path if not seen(p)]
    106 
    107                 yield from walk_packages(path, info.name+'.', onerror)
    108 
    109 
    110 def iter_modules(path=None, prefix=''):
    111     """Yields ModuleInfo for all submodules on path,
    112     or, if path is None, all top-level modules on sys.path.
    113 
    114     'path' should be either None or a list of paths to look for
    115     modules in.
    116 
    117     'prefix' is a string to output on the front of every module name
    118     on output.
    119     """
    120     if path is None:
    121         importers = iter_importers()
    122     else:
    123         importers = map(get_importer, path)
    124 
    125     yielded = {}
    126     for i in importers:
    127         for name, ispkg in iter_importer_modules(i, prefix):
    128             if name not in yielded:
    129                 yielded[name] = 1
    130                 yield ModuleInfo(i, name, ispkg)
    131 
    132 
    133 @simplegeneric
    134 def iter_importer_modules(importer, prefix=''):
    135     if not hasattr(importer, 'iter_modules'):
    136         return []
    137     return importer.iter_modules(prefix)
    138 
    139 
    140 # Implement a file walker for the normal importlib path hook
    141 def _iter_file_finder_modules(importer, prefix=''):
    142     if importer.path is None or not os.path.isdir(importer.path):
    143         return
    144 
    145     yielded = {}
    146     import inspect
    147     try:
    148         filenames = os.listdir(importer.path)
    149     except OSError:
    150         # ignore unreadable directories like import does
    151         filenames = []
    152     filenames.sort()  # handle packages before same-named modules
    153 
    154     for fn in filenames:
    155         modname = inspect.getmodulename(fn)
    156         if modname=='__init__' or modname in yielded:
    157             continue
    158 
    159         path = os.path.join(importer.path, fn)
    160         ispkg = False
    161 
    162         if not modname and os.path.isdir(path) and '.' not in fn:
    163             modname = fn
    164             try:
    165                 dircontents = os.listdir(path)
    166             except OSError:
    167                 # ignore unreadable directories like import does
    168                 dircontents = []
    169             for fn in dircontents:
    170                 subname = inspect.getmodulename(fn)
    171                 if subname=='__init__':
    172                     ispkg = True
    173                     break
    174             else:
    175                 continue    # not a package
    176 
    177         if modname and '.' not in modname:
    178             yielded[modname] = 1
    179             yield prefix + modname, ispkg
    180 
    181 iter_importer_modules.register(
    182     importlib.machinery.FileFinder, _iter_file_finder_modules)
    183 
    184 
    185 def _import_imp():
    186     global imp
    187     with warnings.catch_warnings():
    188         warnings.simplefilter('ignore', DeprecationWarning)
    189         imp = importlib.import_module('imp')
    190 
    191 class ImpImporter:
    192     """PEP 302 Finder that wraps Python's "classic" import algorithm
    193 
    194     ImpImporter(dirname) produces a PEP 302 finder that searches that
    195     directory.  ImpImporter(None) produces a PEP 302 finder that searches
    196     the current sys.path, plus any modules that are frozen or built-in.
    197 
    198     Note that ImpImporter does not currently support being used by placement
    199     on sys.meta_path.
    200     """
    201 
    202     def __init__(self, path=None):
    203         global imp
    204         warnings.warn("This emulation is deprecated, use 'importlib' instead",
    205              DeprecationWarning)
    206         _import_imp()
    207         self.path = path
    208 
    209     def find_module(self, fullname, path=None):
    210         # Note: we ignore 'path' argument since it is only used via meta_path
    211         subname = fullname.split(".")[-1]
    212         if subname != fullname and self.path is None:
    213             return None
    214         if self.path is None:
    215             path = None
    216         else:
    217             path = [os.path.realpath(self.path)]
    218         try:
    219             file, filename, etc = imp.find_module(subname, path)
    220         except ImportError:
    221             return None
    222         return ImpLoader(fullname, file, filename, etc)
    223 
    224     def iter_modules(self, prefix=''):
    225         if self.path is None or not os.path.isdir(self.path):
    226             return
    227 
    228         yielded = {}
    229         import inspect
    230         try:
    231             filenames = os.listdir(self.path)
    232         except OSError:
    233             # ignore unreadable directories like import does
    234             filenames = []
    235         filenames.sort()  # handle packages before same-named modules
    236 
    237         for fn in filenames:
    238             modname = inspect.getmodulename(fn)
    239             if modname=='__init__' or modname in yielded:
    240                 continue
    241 
    242             path = os.path.join(self.path, fn)
    243             ispkg = False
    244 
    245             if not modname and os.path.isdir(path) and '.' not in fn:
    246                 modname = fn
    247                 try:
    248                     dircontents = os.listdir(path)
    249                 except OSError:
    250                     # ignore unreadable directories like import does
    251                     dircontents = []
    252                 for fn in dircontents:
    253                     subname = inspect.getmodulename(fn)
    254                     if subname=='__init__':
    255                         ispkg = True
    256                         break
    257                 else:
    258                     continue    # not a package
    259 
    260             if modname and '.' not in modname:
    261                 yielded[modname] = 1
    262                 yield prefix + modname, ispkg
    263 
    264 
    265 class ImpLoader:
    266     """PEP 302 Loader that wraps Python's "classic" import algorithm
    267     """
    268     code = source = None
    269 
    270     def __init__(self, fullname, file, filename, etc):
    271         warnings.warn("This emulation is deprecated, use 'importlib' instead",
    272                       DeprecationWarning)
    273         _import_imp()
    274         self.file = file
    275         self.filename = filename
    276         self.fullname = fullname
    277         self.etc = etc
    278 
    279     def load_module(self, fullname):
    280         self._reopen()
    281         try:
    282             mod = imp.load_module(fullname, self.file, self.filename, self.etc)
    283         finally:
    284             if self.file:
    285                 self.file.close()
    286         # Note: we don't set __loader__ because we want the module to look
    287         # normal; i.e. this is just a wrapper for standard import machinery
    288         return mod
    289 
    290     def get_data(self, pathname):
    291         with open(pathname, "rb") as file:
    292             return file.read()
    293 
    294     def _reopen(self):
    295         if self.file and self.file.closed:
    296             mod_type = self.etc[2]
    297             if mod_type==imp.PY_SOURCE:
    298                 self.file = open(self.filename, 'r')
    299             elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION):
    300                 self.file = open(self.filename, 'rb')
    301 
    302     def _fix_name(self, fullname):
    303         if fullname is None:
    304             fullname = self.fullname
    305         elif fullname != self.fullname:
    306             raise ImportError("Loader for module %s cannot handle "
    307                               "module %s" % (self.fullname, fullname))
    308         return fullname
    309 
    310     def is_package(self, fullname):
    311         fullname = self._fix_name(fullname)
    312         return self.etc[2]==imp.PKG_DIRECTORY
    313 
    314     def get_code(self, fullname=None):
    315         fullname = self._fix_name(fullname)
    316         if self.code is None:
    317             mod_type = self.etc[2]
    318             if mod_type==imp.PY_SOURCE:
    319                 source = self.get_source(fullname)
    320                 self.code = compile(source, self.filename, 'exec')
    321             elif mod_type==imp.PY_COMPILED:
    322                 self._reopen()
    323                 try:
    324                     self.code = read_code(self.file)
    325                 finally:
    326                     self.file.close()
    327             elif mod_type==imp.PKG_DIRECTORY:
    328                 self.code = self._get_delegate().get_code()
    329         return self.code
    330 
    331     def get_source(self, fullname=None):
    332         fullname = self._fix_name(fullname)
    333         if self.source is None:
    334             mod_type = self.etc[2]
    335             if mod_type==imp.PY_SOURCE:
    336                 self._reopen()
    337                 try:
    338                     self.source = self.file.read()
    339                 finally:
    340                     self.file.close()
    341             elif mod_type==imp.PY_COMPILED:
    342                 if os.path.exists(self.filename[:-1]):
    343                     with open(self.filename[:-1], 'r') as f:
    344                         self.source = f.read()
    345             elif mod_type==imp.PKG_DIRECTORY:
    346                 self.source = self._get_delegate().get_source()
    347         return self.source
    348 
    349     def _get_delegate(self):
    350         finder = ImpImporter(self.filename)
    351         spec = _get_spec(finder, '__init__')
    352         return spec.loader
    353 
    354     def get_filename(self, fullname=None):
    355         fullname = self._fix_name(fullname)
    356         mod_type = self.etc[2]
    357         if mod_type==imp.PKG_DIRECTORY:
    358             return self._get_delegate().get_filename()
    359         elif mod_type in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION):
    360             return self.filename
    361         return None
    362 
    363 
    364 try:
    365     import zipimport
    366     from zipimport import zipimporter
    367 
    368     def iter_zipimport_modules(importer, prefix=''):
    369         dirlist = sorted(zipimport._zip_directory_cache[importer.archive])
    370         _prefix = importer.prefix
    371         plen = len(_prefix)
    372         yielded = {}
    373         import inspect
    374         for fn in dirlist:
    375             if not fn.startswith(_prefix):
    376                 continue
    377 
    378             fn = fn[plen:].split(os.sep)
    379 
    380             if len(fn)==2 and fn[1].startswith('__init__.py'):
    381                 if fn[0] not in yielded:
    382                     yielded[fn[0]] = 1
    383                     yield prefix + fn[0], True
    384 
    385             if len(fn)!=1:
    386                 continue
    387 
    388             modname = inspect.getmodulename(fn[0])
    389             if modname=='__init__':
    390                 continue
    391 
    392             if modname and '.' not in modname and modname not in yielded:
    393                 yielded[modname] = 1
    394                 yield prefix + modname, False
    395 
    396     iter_importer_modules.register(zipimporter, iter_zipimport_modules)
    397 
    398 except ImportError:
    399     pass
    400 
    401 
    402 def get_importer(path_item):
    403     """Retrieve a finder for the given path item
    404 
    405     The returned finder is cached in sys.path_importer_cache
    406     if it was newly created by a path hook.
    407 
    408     The cache (or part of it) can be cleared manually if a
    409     rescan of sys.path_hooks is necessary.
    410     """
    411     try:
    412         importer = sys.path_importer_cache[path_item]
    413     except KeyError:
    414         for path_hook in sys.path_hooks:
    415             try:
    416                 importer = path_hook(path_item)
    417                 sys.path_importer_cache.setdefault(path_item, importer)
    418                 break
    419             except ImportError:
    420                 pass
    421         else:
    422             importer = None
    423     return importer
    424 
    425 
    426 def iter_importers(fullname=""):
    427     """Yield finders for the given module name
    428 
    429     If fullname contains a '.', the finders will be for the package
    430     containing fullname, otherwise they will be all registered top level
    431     finders (i.e. those on both sys.meta_path and sys.path_hooks).
    432 
    433     If the named module is in a package, that package is imported as a side
    434     effect of invoking this function.
    435 
    436     If no module name is specified, all top level finders are produced.
    437     """
    438     if fullname.startswith('.'):
    439         msg = "Relative module name {!r} not supported".format(fullname)
    440         raise ImportError(msg)
    441     if '.' in fullname:
    442         # Get the containing package's __path__
    443         pkg_name = fullname.rpartition(".")[0]
    444         pkg = importlib.import_module(pkg_name)
    445         path = getattr(pkg, '__path__', None)
    446         if path is None:
    447             return
    448     else:
    449         yield from sys.meta_path
    450         path = sys.path
    451     for item in path:
    452         yield get_importer(item)
    453 
    454 
    455 def get_loader(module_or_name):
    456     """Get a "loader" object for module_or_name
    457 
    458     Returns None if the module cannot be found or imported.
    459     If the named module is not already imported, its containing package
    460     (if any) is imported, in order to establish the package __path__.
    461     """
    462     if module_or_name in sys.modules:
    463         module_or_name = sys.modules[module_or_name]
    464         if module_or_name is None:
    465             return None
    466     if isinstance(module_or_name, ModuleType):
    467         module = module_or_name
    468         loader = getattr(module, '__loader__', None)
    469         if loader is not None:
    470             return loader
    471         if getattr(module, '__spec__', None) is None:
    472             return None
    473         fullname = module.__name__
    474     else:
    475         fullname = module_or_name
    476     return find_loader(fullname)
    477 
    478 
    479 def find_loader(fullname):
    480     """Find a "loader" object for fullname
    481 
    482     This is a backwards compatibility wrapper around
    483     importlib.util.find_spec that converts most failures to ImportError
    484     and only returns the loader rather than the full spec
    485     """
    486     if fullname.startswith('.'):
    487         msg = "Relative module name {!r} not supported".format(fullname)
    488         raise ImportError(msg)
    489     try:
    490         spec = importlib.util.find_spec(fullname)
    491     except (ImportError, AttributeError, TypeError, ValueError) as ex:
    492         # This hack fixes an impedance mismatch between pkgutil and
    493         # importlib, where the latter raises other errors for cases where
    494         # pkgutil previously raised ImportError
    495         msg = "Error while finding loader for {!r} ({}: {})"
    496         raise ImportError(msg.format(fullname, type(ex), ex)) from ex
    497     return spec.loader if spec is not None else None
    498 
    499 
    500 def extend_path(path, name):
    501     """Extend a package's path.
    502 
    503     Intended use is to place the following code in a package's __init__.py:
    504 
    505         from pkgutil import extend_path
    506         __path__ = extend_path(__path__, __name__)
    507 
    508     This will add to the package's __path__ all subdirectories of
    509     directories on sys.path named after the package.  This is useful
    510     if one wants to distribute different parts of a single logical
    511     package as multiple directories.
    512 
    513     It also looks for *.pkg files beginning where * matches the name
    514     argument.  This feature is similar to *.pth files (see site.py),
    515     except that it doesn't special-case lines starting with 'import'.
    516     A *.pkg file is trusted at face value: apart from checking for
    517     duplicates, all entries found in a *.pkg file are added to the
    518     path, regardless of whether they are exist the filesystem.  (This
    519     is a feature.)
    520 
    521     If the input path is not a list (as is the case for frozen
    522     packages) it is returned unchanged.  The input path is not
    523     modified; an extended copy is returned.  Items are only appended
    524     to the copy at the end.
    525 
    526     It is assumed that sys.path is a sequence.  Items of sys.path that
    527     are not (unicode or 8-bit) strings referring to existing
    528     directories are ignored.  Unicode items of sys.path that cause
    529     errors when used as filenames may cause this function to raise an
    530     exception (in line with os.path.isdir() behavior).
    531     """
    532 
    533     if not isinstance(path, list):
    534         # This could happen e.g. when this is called from inside a
    535         # frozen package.  Return the path unchanged in that case.
    536         return path
    537 
    538     sname_pkg = name + ".pkg"
    539 
    540     path = path[:] # Start with a copy of the existing path
    541 
    542     parent_package, _, final_name = name.rpartition('.')
    543     if parent_package:
    544         try:
    545             search_path = sys.modules[parent_package].__path__
    546         except (KeyError, AttributeError):
    547             # We can't do anything: find_loader() returns None when
    548             # passed a dotted name.
    549             return path
    550     else:
    551         search_path = sys.path
    552 
    553     for dir in search_path:
    554         if not isinstance(dir, str):
    555             continue
    556 
    557         finder = get_importer(dir)
    558         if finder is not None:
    559             portions = []
    560             if hasattr(finder, 'find_spec'):
    561                 spec = finder.find_spec(final_name)
    562                 if spec is not None:
    563                     portions = spec.submodule_search_locations or []
    564             # Is this finder PEP 420 compliant?
    565             elif hasattr(finder, 'find_loader'):
    566                 _, portions = finder.find_loader(final_name)
    567 
    568             for portion in portions:
    569                 # XXX This may still add duplicate entries to path on
    570                 # case-insensitive filesystems
    571                 if portion not in path:
    572                     path.append(portion)
    573 
    574         # XXX Is this the right thing for subpackages like zope.app?
    575         # It looks for a file named "zope.app.pkg"
    576         pkgfile = os.path.join(dir, sname_pkg)
    577         if os.path.isfile(pkgfile):
    578             try:
    579                 f = open(pkgfile)
    580             except OSError as msg:
    581                 sys.stderr.write("Can't open %s: %s\n" %
    582                                  (pkgfile, msg))
    583             else:
    584                 with f:
    585                     for line in f:
    586                         line = line.rstrip('\n')
    587                         if not line or line.startswith('#'):
    588                             continue
    589                         path.append(line) # Don't check for existence!
    590 
    591     return path
    592 
    593 
    594 def get_data(package, resource):
    595     """Get a resource from a package.
    596 
    597     This is a wrapper round the PEP 302 loader get_data API. The package
    598     argument should be the name of a package, in standard module format
    599     (foo.bar). The resource argument should be in the form of a relative
    600     filename, using '/' as the path separator. The parent directory name '..'
    601     is not allowed, and nor is a rooted name (starting with a '/').
    602 
    603     The function returns a binary string, which is the contents of the
    604     specified resource.
    605 
    606     For packages located in the filesystem, which have already been imported,
    607     this is the rough equivalent of
    608 
    609         d = os.path.dirname(sys.modules[package].__file__)
    610         data = open(os.path.join(d, resource), 'rb').read()
    611 
    612     If the package cannot be located or loaded, or it uses a PEP 302 loader
    613     which does not support get_data(), then None is returned.
    614     """
    615 
    616     spec = importlib.util.find_spec(package)
    617     if spec is None:
    618         return None
    619     loader = spec.loader
    620     if loader is None or not hasattr(loader, 'get_data'):
    621         return None
    622     # XXX needs test
    623     mod = (sys.modules.get(package) or
    624            importlib._bootstrap._load(spec))
    625     if mod is None or not hasattr(mod, '__file__'):
    626         return None
    627 
    628     # Modify the resource name to be compatible with the loader.get_data
    629     # signature - an os.path format "filename" starting with the dirname of
    630     # the package's __file__
    631     parts = resource.split('/')
    632     parts.insert(0, os.path.dirname(mod.__file__))
    633     resource_name = os.path.join(*parts)
    634     return loader.get_data(resource_name)
    635