Home | History | Annotate | Download | only in Lib
      1 """Find modules used by a script, using introspection."""
      2 
      3 import dis
      4 import importlib._bootstrap_external
      5 import importlib.machinery
      6 import marshal
      7 import os
      8 import sys
      9 import types
     10 import struct
     11 import warnings
     12 with warnings.catch_warnings():
     13     warnings.simplefilter('ignore', DeprecationWarning)
     14     import imp
     15 
     16 LOAD_CONST = dis.opmap['LOAD_CONST']
     17 IMPORT_NAME = dis.opmap['IMPORT_NAME']
     18 STORE_NAME = dis.opmap['STORE_NAME']
     19 STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
     20 STORE_OPS = STORE_NAME, STORE_GLOBAL
     21 EXTENDED_ARG = dis.EXTENDED_ARG
     22 
     23 # Modulefinder does a good job at simulating Python's, but it can not
     24 # handle __path__ modifications packages make at runtime.  Therefore there
     25 # is a mechanism whereby you can register extra paths in this map for a
     26 # package, and it will be honored.
     27 
     28 # Note this is a mapping is lists of paths.
     29 packagePathMap = {}
     30 
     31 # A Public interface
     32 def AddPackagePath(packagename, path):
     33     packagePathMap.setdefault(packagename, []).append(path)
     34 
     35 replacePackageMap = {}
     36 
     37 # This ReplacePackage mechanism allows modulefinder to work around
     38 # situations in which a package injects itself under the name
     39 # of another package into sys.modules at runtime by calling
     40 # ReplacePackage("real_package_name", "faked_package_name")
     41 # before running ModuleFinder.
     42 
     43 def ReplacePackage(oldname, newname):
     44     replacePackageMap[oldname] = newname
     45 
     46 
     47 class Module:
     48 
     49     def __init__(self, name, file=None, path=None):
     50         self.__name__ = name
     51         self.__file__ = file
     52         self.__path__ = path
     53         self.__code__ = None
     54         # The set of global names that are assigned to in the module.
     55         # This includes those names imported through starimports of
     56         # Python modules.
     57         self.globalnames = {}
     58         # The set of starimports this module did that could not be
     59         # resolved, ie. a starimport from a non-Python module.
     60         self.starimports = {}
     61 
     62     def __repr__(self):
     63         s = "Module(%r" % (self.__name__,)
     64         if self.__file__ is not None:
     65             s = s + ", %r" % (self.__file__,)
     66         if self.__path__ is not None:
     67             s = s + ", %r" % (self.__path__,)
     68         s = s + ")"
     69         return s
     70 
     71 class ModuleFinder:
     72 
     73     def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
     74         if path is None:
     75             path = sys.path
     76         self.path = path
     77         self.modules = {}
     78         self.badmodules = {}
     79         self.debug = debug
     80         self.indent = 0
     81         self.excludes = excludes
     82         self.replace_paths = replace_paths
     83         self.processed_paths = []   # Used in debugging only
     84 
     85     def msg(self, level, str, *args):
     86         if level <= self.debug:
     87             for i in range(self.indent):
     88                 print("   ", end=' ')
     89             print(str, end=' ')
     90             for arg in args:
     91                 print(repr(arg), end=' ')
     92             print()
     93 
     94     def msgin(self, *args):
     95         level = args[0]
     96         if level <= self.debug:
     97             self.indent = self.indent + 1
     98             self.msg(*args)
     99 
    100     def msgout(self, *args):
    101         level = args[0]
    102         if level <= self.debug:
    103             self.indent = self.indent - 1
    104             self.msg(*args)
    105 
    106     def run_script(self, pathname):
    107         self.msg(2, "run_script", pathname)
    108         with open(pathname) as fp:
    109             stuff = ("", "r", imp.PY_SOURCE)
    110             self.load_module('__main__', fp, pathname, stuff)
    111 
    112     def load_file(self, pathname):
    113         dir, name = os.path.split(pathname)
    114         name, ext = os.path.splitext(name)
    115         with open(pathname) as fp:
    116             stuff = (ext, "r", imp.PY_SOURCE)
    117             self.load_module(name, fp, pathname, stuff)
    118 
    119     def import_hook(self, name, caller=None, fromlist=None, level=-1):
    120         self.msg(3, "import_hook", name, caller, fromlist, level)
    121         parent = self.determine_parent(caller, level=level)
    122         q, tail = self.find_head_package(parent, name)
    123         m = self.load_tail(q, tail)
    124         if not fromlist:
    125             return q
    126         if m.__path__:
    127             self.ensure_fromlist(m, fromlist)
    128         return None
    129 
    130     def determine_parent(self, caller, level=-1):
    131         self.msgin(4, "determine_parent", caller, level)
    132         if not caller or level == 0:
    133             self.msgout(4, "determine_parent -> None")
    134             return None
    135         pname = caller.__name__
    136         if level >= 1: # relative import
    137             if caller.__path__:
    138                 level -= 1
    139             if level == 0:
    140                 parent = self.modules[pname]
    141                 assert parent is caller
    142                 self.msgout(4, "determine_parent ->", parent)
    143                 return parent
    144             if pname.count(".") < level:
    145                 raise ImportError("relative importpath too deep")
    146             pname = ".".join(pname.split(".")[:-level])
    147             parent = self.modules[pname]
    148             self.msgout(4, "determine_parent ->", parent)
    149             return parent
    150         if caller.__path__:
    151             parent = self.modules[pname]
    152             assert caller is parent
    153             self.msgout(4, "determine_parent ->", parent)
    154             return parent
    155         if '.' in pname:
    156             i = pname.rfind('.')
    157             pname = pname[:i]
    158             parent = self.modules[pname]
    159             assert parent.__name__ == pname
    160             self.msgout(4, "determine_parent ->", parent)
    161             return parent
    162         self.msgout(4, "determine_parent -> None")
    163         return None
    164 
    165     def find_head_package(self, parent, name):
    166         self.msgin(4, "find_head_package", parent, name)
    167         if '.' in name:
    168             i = name.find('.')
    169             head = name[:i]
    170             tail = name[i+1:]
    171         else:
    172             head = name
    173             tail = ""
    174         if parent:
    175             qname = "%s.%s" % (parent.__name__, head)
    176         else:
    177             qname = head
    178         q = self.import_module(head, qname, parent)
    179         if q:
    180             self.msgout(4, "find_head_package ->", (q, tail))
    181             return q, tail
    182         if parent:
    183             qname = head
    184             parent = None
    185             q = self.import_module(head, qname, parent)
    186             if q:
    187                 self.msgout(4, "find_head_package ->", (q, tail))
    188                 return q, tail
    189         self.msgout(4, "raise ImportError: No module named", qname)
    190         raise ImportError("No module named " + qname)
    191 
    192     def load_tail(self, q, tail):
    193         self.msgin(4, "load_tail", q, tail)
    194         m = q
    195         while tail:
    196             i = tail.find('.')
    197             if i < 0: i = len(tail)
    198             head, tail = tail[:i], tail[i+1:]
    199             mname = "%s.%s" % (m.__name__, head)
    200             m = self.import_module(head, mname, m)
    201             if not m:
    202                 self.msgout(4, "raise ImportError: No module named", mname)
    203                 raise ImportError("No module named " + mname)
    204         self.msgout(4, "load_tail ->", m)
    205         return m
    206 
    207     def ensure_fromlist(self, m, fromlist, recursive=0):
    208         self.msg(4, "ensure_fromlist", m, fromlist, recursive)
    209         for sub in fromlist:
    210             if sub == "*":
    211                 if not recursive:
    212                     all = self.find_all_submodules(m)
    213                     if all:
    214                         self.ensure_fromlist(m, all, 1)
    215             elif not hasattr(m, sub):
    216                 subname = "%s.%s" % (m.__name__, sub)
    217                 submod = self.import_module(sub, subname, m)
    218                 if not submod:
    219                     raise ImportError("No module named " + subname)
    220 
    221     def find_all_submodules(self, m):
    222         if not m.__path__:
    223             return
    224         modules = {}
    225         # 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
    226         # But we must also collect Python extension modules - although
    227         # we cannot separate normal dlls from Python extensions.
    228         suffixes = []
    229         suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
    230         suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
    231         suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
    232         for dir in m.__path__:
    233             try:
    234                 names = os.listdir(dir)
    235             except OSError:
    236                 self.msg(2, "can't list directory", dir)
    237                 continue
    238             for name in names:
    239                 mod = None
    240                 for suff in suffixes:
    241                     n = len(suff)
    242                     if name[-n:] == suff:
    243                         mod = name[:-n]
    244                         break
    245                 if mod and mod != "__init__":
    246                     modules[mod] = mod
    247         return modules.keys()
    248 
    249     def import_module(self, partname, fqname, parent):
    250         self.msgin(3, "import_module", partname, fqname, parent)
    251         try:
    252             m = self.modules[fqname]
    253         except KeyError:
    254             pass
    255         else:
    256             self.msgout(3, "import_module ->", m)
    257             return m
    258         if fqname in self.badmodules:
    259             self.msgout(3, "import_module -> None")
    260             return None
    261         if parent and parent.__path__ is None:
    262             self.msgout(3, "import_module -> None")
    263             return None
    264         try:
    265             fp, pathname, stuff = self.find_module(partname,
    266                                                    parent and parent.__path__, parent)
    267         except ImportError:
    268             self.msgout(3, "import_module ->", None)
    269             return None
    270         try:
    271             m = self.load_module(fqname, fp, pathname, stuff)
    272         finally:
    273             if fp:
    274                 fp.close()
    275         if parent:
    276             setattr(parent, partname, m)
    277         self.msgout(3, "import_module ->", m)
    278         return m
    279 
    280     def load_module(self, fqname, fp, pathname, file_info):
    281         suffix, mode, type = file_info
    282         self.msgin(2, "load_module", fqname, fp and "fp", pathname)
    283         if type == imp.PKG_DIRECTORY:
    284             m = self.load_package(fqname, pathname)
    285             self.msgout(2, "load_module ->", m)
    286             return m
    287         if type == imp.PY_SOURCE:
    288             co = compile(fp.read()+'\n', pathname, 'exec')
    289         elif type == imp.PY_COMPILED:
    290             try:
    291                 marshal_data = importlib._bootstrap_external._validate_bytecode_header(fp.read())
    292             except ImportError as exc:
    293                 self.msgout(2, "raise ImportError: " + str(exc), pathname)
    294                 raise
    295             co = marshal.loads(marshal_data)
    296         else:
    297             co = None
    298         m = self.add_module(fqname)
    299         m.__file__ = pathname
    300         if co:
    301             if self.replace_paths:
    302                 co = self.replace_paths_in_code(co)
    303             m.__code__ = co
    304             self.scan_code(co, m)
    305         self.msgout(2, "load_module ->", m)
    306         return m
    307 
    308     def _add_badmodule(self, name, caller):
    309         if name not in self.badmodules:
    310             self.badmodules[name] = {}
    311         if caller:
    312             self.badmodules[name][caller.__name__] = 1
    313         else:
    314             self.badmodules[name]["-"] = 1
    315 
    316     def _safe_import_hook(self, name, caller, fromlist, level=-1):
    317         # wrapper for self.import_hook() that won't raise ImportError
    318         if name in self.badmodules:
    319             self._add_badmodule(name, caller)
    320             return
    321         try:
    322             self.import_hook(name, caller, level=level)
    323         except ImportError as msg:
    324             self.msg(2, "ImportError:", str(msg))
    325             self._add_badmodule(name, caller)
    326         else:
    327             if fromlist:
    328                 for sub in fromlist:
    329                     if sub in self.badmodules:
    330                         self._add_badmodule(sub, caller)
    331                         continue
    332                     try:
    333                         self.import_hook(name, caller, [sub], level=level)
    334                     except ImportError as msg:
    335                         self.msg(2, "ImportError:", str(msg))
    336                         fullname = name + "." + sub
    337                         self._add_badmodule(fullname, caller)
    338 
    339     def scan_opcodes(self, co):
    340         # Scan the code, and yield 'interesting' opcode combinations
    341         code = co.co_code
    342         names = co.co_names
    343         consts = co.co_consts
    344         opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
    345                   if op != EXTENDED_ARG]
    346         for i, (op, oparg) in enumerate(opargs):
    347             if op in STORE_OPS:
    348                 yield "store", (names[oparg],)
    349                 continue
    350             if (op == IMPORT_NAME and i >= 2
    351                     and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
    352                 level = consts[opargs[i-2][1]]
    353                 fromlist = consts[opargs[i-1][1]]
    354                 if level == 0: # absolute import
    355                     yield "absolute_import", (fromlist, names[oparg])
    356                 else: # relative import
    357                     yield "relative_import", (level, fromlist, names[oparg])
    358                 continue
    359 
    360     def scan_code(self, co, m):
    361         code = co.co_code
    362         scanner = self.scan_opcodes
    363         for what, args in scanner(co):
    364             if what == "store":
    365                 name, = args
    366                 m.globalnames[name] = 1
    367             elif what == "absolute_import":
    368                 fromlist, name = args
    369                 have_star = 0
    370                 if fromlist is not None:
    371                     if "*" in fromlist:
    372                         have_star = 1
    373                     fromlist = [f for f in fromlist if f != "*"]
    374                 self._safe_import_hook(name, m, fromlist, level=0)
    375                 if have_star:
    376                     # We've encountered an "import *". If it is a Python module,
    377                     # the code has already been parsed and we can suck out the
    378                     # global names.
    379                     mm = None
    380                     if m.__path__:
    381                         # At this point we don't know whether 'name' is a
    382                         # submodule of 'm' or a global module. Let's just try
    383                         # the full name first.
    384                         mm = self.modules.get(m.__name__ + "." + name)
    385                     if mm is None:
    386                         mm = self.modules.get(name)
    387                     if mm is not None:
    388                         m.globalnames.update(mm.globalnames)
    389                         m.starimports.update(mm.starimports)
    390                         if mm.__code__ is None:
    391                             m.starimports[name] = 1
    392                     else:
    393                         m.starimports[name] = 1
    394             elif what == "relative_import":
    395                 level, fromlist, name = args
    396                 if name:
    397                     self._safe_import_hook(name, m, fromlist, level=level)
    398                 else:
    399                     parent = self.determine_parent(m, level=level)
    400                     self._safe_import_hook(parent.__name__, None, fromlist, level=0)
    401             else:
    402                 # We don't expect anything else from the generator.
    403                 raise RuntimeError(what)
    404 
    405         for c in co.co_consts:
    406             if isinstance(c, type(co)):
    407                 self.scan_code(c, m)
    408 
    409     def load_package(self, fqname, pathname):
    410         self.msgin(2, "load_package", fqname, pathname)
    411         newname = replacePackageMap.get(fqname)
    412         if newname:
    413             fqname = newname
    414         m = self.add_module(fqname)
    415         m.__file__ = pathname
    416         m.__path__ = [pathname]
    417 
    418         # As per comment at top of file, simulate runtime __path__ additions.
    419         m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
    420 
    421         fp, buf, stuff = self.find_module("__init__", m.__path__)
    422         try:
    423             self.load_module(fqname, fp, buf, stuff)
    424             self.msgout(2, "load_package ->", m)
    425             return m
    426         finally:
    427             if fp:
    428                 fp.close()
    429 
    430     def add_module(self, fqname):
    431         if fqname in self.modules:
    432             return self.modules[fqname]
    433         self.modules[fqname] = m = Module(fqname)
    434         return m
    435 
    436     def find_module(self, name, path, parent=None):
    437         if parent is not None:
    438             # assert path is not None
    439             fullname = parent.__name__+'.'+name
    440         else:
    441             fullname = name
    442         if fullname in self.excludes:
    443             self.msgout(3, "find_module -> Excluded", fullname)
    444             raise ImportError(name)
    445 
    446         if path is None:
    447             if name in sys.builtin_module_names:
    448                 return (None, None, ("", "", imp.C_BUILTIN))
    449 
    450             path = self.path
    451         return imp.find_module(name, path)
    452 
    453     def report(self):
    454         """Print a report to stdout, listing the found modules with their
    455         paths, as well as modules that are missing, or seem to be missing.
    456         """
    457         print()
    458         print("  %-25s %s" % ("Name", "File"))
    459         print("  %-25s %s" % ("----", "----"))
    460         # Print modules found
    461         keys = sorted(self.modules.keys())
    462         for key in keys:
    463             m = self.modules[key]
    464             if m.__path__:
    465                 print("P", end=' ')
    466             else:
    467                 print("m", end=' ')
    468             print("%-25s" % key, m.__file__ or "")
    469 
    470         # Print missing modules
    471         missing, maybe = self.any_missing_maybe()
    472         if missing:
    473             print()
    474             print("Missing modules:")
    475             for name in missing:
    476                 mods = sorted(self.badmodules[name].keys())
    477                 print("?", name, "imported from", ', '.join(mods))
    478         # Print modules that may be missing, but then again, maybe not...
    479         if maybe:
    480             print()
    481             print("Submodules that appear to be missing, but could also be", end=' ')
    482             print("global names in the parent package:")
    483             for name in maybe:
    484                 mods = sorted(self.badmodules[name].keys())
    485                 print("?", name, "imported from", ', '.join(mods))
    486 
    487     def any_missing(self):
    488         """Return a list of modules that appear to be missing. Use
    489         any_missing_maybe() if you want to know which modules are
    490         certain to be missing, and which *may* be missing.
    491         """
    492         missing, maybe = self.any_missing_maybe()
    493         return missing + maybe
    494 
    495     def any_missing_maybe(self):
    496         """Return two lists, one with modules that are certainly missing
    497         and one with modules that *may* be missing. The latter names could
    498         either be submodules *or* just global names in the package.
    499 
    500         The reason it can't always be determined is that it's impossible to
    501         tell which names are imported when "from module import *" is done
    502         with an extension module, short of actually importing it.
    503         """
    504         missing = []
    505         maybe = []
    506         for name in self.badmodules:
    507             if name in self.excludes:
    508                 continue
    509             i = name.rfind(".")
    510             if i < 0:
    511                 missing.append(name)
    512                 continue
    513             subname = name[i+1:]
    514             pkgname = name[:i]
    515             pkg = self.modules.get(pkgname)
    516             if pkg is not None:
    517                 if pkgname in self.badmodules[name]:
    518                     # The package tried to import this module itself and
    519                     # failed. It's definitely missing.
    520                     missing.append(name)
    521                 elif subname in pkg.globalnames:
    522                     # It's a global in the package: definitely not missing.
    523                     pass
    524                 elif pkg.starimports:
    525                     # It could be missing, but the package did an "import *"
    526                     # from a non-Python module, so we simply can't be sure.
    527                     maybe.append(name)
    528                 else:
    529                     # It's not a global in the package, the package didn't
    530                     # do funny star imports, it's very likely to be missing.
    531                     # The symbol could be inserted into the package from the
    532                     # outside, but since that's not good style we simply list
    533                     # it missing.
    534                     missing.append(name)
    535             else:
    536                 missing.append(name)
    537         missing.sort()
    538         maybe.sort()
    539         return missing, maybe
    540 
    541     def replace_paths_in_code(self, co):
    542         new_filename = original_filename = os.path.normpath(co.co_filename)
    543         for f, r in self.replace_paths:
    544             if original_filename.startswith(f):
    545                 new_filename = r + original_filename[len(f):]
    546                 break
    547 
    548         if self.debug and original_filename not in self.processed_paths:
    549             if new_filename != original_filename:
    550                 self.msgout(2, "co_filename %r changed to %r" \
    551                                     % (original_filename,new_filename,))
    552             else:
    553                 self.msgout(2, "co_filename %r remains unchanged" \
    554                                     % (original_filename,))
    555             self.processed_paths.append(original_filename)
    556 
    557         consts = list(co.co_consts)
    558         for i in range(len(consts)):
    559             if isinstance(consts[i], type(co)):
    560                 consts[i] = self.replace_paths_in_code(consts[i])
    561 
    562         return types.CodeType(co.co_argcount, co.co_kwonlyargcount,
    563                               co.co_nlocals, co.co_stacksize, co.co_flags,
    564                               co.co_code, tuple(consts), co.co_names,
    565                               co.co_varnames, new_filename, co.co_name,
    566                               co.co_firstlineno, co.co_lnotab, co.co_freevars,
    567                               co.co_cellvars)
    568 
    569 
    570 def test():
    571     # Parse command line
    572     import getopt
    573     try:
    574         opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
    575     except getopt.error as msg:
    576         print(msg)
    577         return
    578 
    579     # Process options
    580     debug = 1
    581     domods = 0
    582     addpath = []
    583     exclude = []
    584     for o, a in opts:
    585         if o == '-d':
    586             debug = debug + 1
    587         if o == '-m':
    588             domods = 1
    589         if o == '-p':
    590             addpath = addpath + a.split(os.pathsep)
    591         if o == '-q':
    592             debug = 0
    593         if o == '-x':
    594             exclude.append(a)
    595 
    596     # Provide default arguments
    597     if not args:
    598         script = "hello.py"
    599     else:
    600         script = args[0]
    601 
    602     # Set the path based on sys.path and the script directory
    603     path = sys.path[:]
    604     path[0] = os.path.dirname(script)
    605     path = addpath + path
    606     if debug > 1:
    607         print("path:")
    608         for item in path:
    609             print("   ", repr(item))
    610 
    611     # Create the module finder and turn its crank
    612     mf = ModuleFinder(path, debug, exclude)
    613     for arg in args[1:]:
    614         if arg == '-m':
    615             domods = 1
    616             continue
    617         if domods:
    618             if arg[-2:] == '.*':
    619                 mf.import_hook(arg[:-2], None, ["*"])
    620             else:
    621                 mf.import_hook(arg)
    622         else:
    623             mf.load_file(arg)
    624     mf.run_script(script)
    625     mf.report()
    626     return mf  # for -i debugging
    627 
    628 
    629 if __name__ == '__main__':
    630     try:
    631         mf = test()
    632     except KeyboardInterrupt:
    633         print("\n[interrupted]")
    634