Home | History | Annotate | Download | only in Lib
      1 """Find modules used by a script, using introspection."""
      2 
      3 from __future__ import generators
      4 import dis
      5 import imp
      6 import marshal
      7 import os
      8 import sys
      9 import types
     10 import struct
     11 
     12 if hasattr(sys.__stdout__, "newlines"):
     13     READ_MODE = "U"  # universal line endings
     14 else:
     15     # Python < 2.3 compatibility, no longer strictly required
     16     READ_MODE = "r"
     17 
     18 LOAD_CONST = dis.opmap['LOAD_CONST']
     19 IMPORT_NAME = dis.opmap['IMPORT_NAME']
     20 STORE_NAME = dis.opmap['STORE_NAME']
     21 STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
     22 STORE_OPS = STORE_NAME, STORE_GLOBAL
     23 HAVE_ARGUMENT = dis.HAVE_ARGUMENT
     24 EXTENDED_ARG = dis.EXTENDED_ARG
     25 
     26 def _unpack_opargs(code):
     27     # enumerate() is not an option, since we sometimes process
     28     # multiple elements on a single pass through the loop
     29     extended_arg = 0
     30     n = len(code)
     31     i = 0
     32     while i < n:
     33         op = ord(code[i])
     34         offset = i
     35         i = i+1
     36         arg = None
     37         if op >= HAVE_ARGUMENT:
     38             arg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
     39             extended_arg = 0
     40             i = i+2
     41             if op == EXTENDED_ARG:
     42                 extended_arg = arg*65536
     43         yield (offset, op, arg)
     44 
     45 # Modulefinder does a good job at simulating Python's, but it can not
     46 # handle __path__ modifications packages make at runtime.  Therefore there
     47 # is a mechanism whereby you can register extra paths in this map for a
     48 # package, and it will be honored.
     49 
     50 # Note this is a mapping is lists of paths.
     51 packagePathMap = {}
     52 
     53 # A Public interface
     54 def AddPackagePath(packagename, path):
     55     paths = packagePathMap.get(packagename, [])
     56     paths.append(path)
     57     packagePathMap[packagename] = paths
     58 
     59 replacePackageMap = {}
     60 
     61 # This ReplacePackage mechanism allows modulefinder to work around the
     62 # way the _xmlplus package injects itself under the name "xml" into
     63 # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
     64 # before running ModuleFinder.
     65 
     66 def ReplacePackage(oldname, newname):
     67     replacePackageMap[oldname] = newname
     68 
     69 
     70 class Module:
     71 
     72     def __init__(self, name, file=None, path=None):
     73         self.__name__ = name
     74         self.__file__ = file
     75         self.__path__ = path
     76         self.__code__ = None
     77         # The set of global names that are assigned to in the module.
     78         # This includes those names imported through starimports of
     79         # Python modules.
     80         self.globalnames = {}
     81         # The set of starimports this module did that could not be
     82         # resolved, ie. a starimport from a non-Python module.
     83         self.starimports = {}
     84 
     85     def __repr__(self):
     86         s = "Module(%r" % (self.__name__,)
     87         if self.__file__ is not None:
     88             s = s + ", %r" % (self.__file__,)
     89         if self.__path__ is not None:
     90             s = s + ", %r" % (self.__path__,)
     91         s = s + ")"
     92         return s
     93 
     94 class ModuleFinder:
     95 
     96     def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
     97         if path is None:
     98             path = sys.path
     99         self.path = path
    100         self.modules = {}
    101         self.badmodules = {}
    102         self.debug = debug
    103         self.indent = 0
    104         self.excludes = excludes
    105         self.replace_paths = replace_paths
    106         self.processed_paths = []   # Used in debugging only
    107 
    108     def msg(self, level, str, *args):
    109         if level <= self.debug:
    110             for i in range(self.indent):
    111                 print "   ",
    112             print str,
    113             for arg in args:
    114                 print repr(arg),
    115             print
    116 
    117     def msgin(self, *args):
    118         level = args[0]
    119         if level <= self.debug:
    120             self.indent = self.indent + 1
    121             self.msg(*args)
    122 
    123     def msgout(self, *args):
    124         level = args[0]
    125         if level <= self.debug:
    126             self.indent = self.indent - 1
    127             self.msg(*args)
    128 
    129     def run_script(self, pathname):
    130         self.msg(2, "run_script", pathname)
    131         with open(pathname, READ_MODE) as fp:
    132             stuff = ("", "r", imp.PY_SOURCE)
    133             self.load_module('__main__', fp, pathname, stuff)
    134 
    135     def load_file(self, pathname):
    136         dir, name = os.path.split(pathname)
    137         name, ext = os.path.splitext(name)
    138         with open(pathname, READ_MODE) as fp:
    139             stuff = (ext, "r", imp.PY_SOURCE)
    140             self.load_module(name, fp, pathname, stuff)
    141 
    142     def import_hook(self, name, caller=None, fromlist=None, level=-1):
    143         self.msg(3, "import_hook", name, caller, fromlist, level)
    144         parent = self.determine_parent(caller, level=level)
    145         q, tail = self.find_head_package(parent, name)
    146         m = self.load_tail(q, tail)
    147         if not fromlist:
    148             return q
    149         if m.__path__:
    150             self.ensure_fromlist(m, fromlist)
    151         return None
    152 
    153     def determine_parent(self, caller, level=-1):
    154         self.msgin(4, "determine_parent", caller, level)
    155         if not caller or level == 0:
    156             self.msgout(4, "determine_parent -> None")
    157             return None
    158         pname = caller.__name__
    159         if level >= 1: # relative import
    160             if caller.__path__:
    161                 level -= 1
    162             if level == 0:
    163                 parent = self.modules[pname]
    164                 assert parent is caller
    165                 self.msgout(4, "determine_parent ->", parent)
    166                 return parent
    167             if pname.count(".") < level:
    168                 raise ImportError, "relative importpath too deep"
    169             pname = ".".join(pname.split(".")[:-level])
    170             parent = self.modules[pname]
    171             self.msgout(4, "determine_parent ->", parent)
    172             return parent
    173         if caller.__path__:
    174             parent = self.modules[pname]
    175             assert caller is parent
    176             self.msgout(4, "determine_parent ->", parent)
    177             return parent
    178         if '.' in pname:
    179             i = pname.rfind('.')
    180             pname = pname[:i]
    181             parent = self.modules[pname]
    182             assert parent.__name__ == pname
    183             self.msgout(4, "determine_parent ->", parent)
    184             return parent
    185         self.msgout(4, "determine_parent -> None")
    186         return None
    187 
    188     def find_head_package(self, parent, name):
    189         self.msgin(4, "find_head_package", parent, name)
    190         if '.' in name:
    191             i = name.find('.')
    192             head = name[:i]
    193             tail = name[i+1:]
    194         else:
    195             head = name
    196             tail = ""
    197         if parent:
    198             qname = "%s.%s" % (parent.__name__, head)
    199         else:
    200             qname = head
    201         q = self.import_module(head, qname, parent)
    202         if q:
    203             self.msgout(4, "find_head_package ->", (q, tail))
    204             return q, tail
    205         if parent:
    206             qname = head
    207             parent = None
    208             q = self.import_module(head, qname, parent)
    209             if q:
    210                 self.msgout(4, "find_head_package ->", (q, tail))
    211                 return q, tail
    212         self.msgout(4, "raise ImportError: No module named", qname)
    213         raise ImportError, "No module named " + qname
    214 
    215     def load_tail(self, q, tail):
    216         self.msgin(4, "load_tail", q, tail)
    217         m = q
    218         while tail:
    219             i = tail.find('.')
    220             if i < 0: i = len(tail)
    221             head, tail = tail[:i], tail[i+1:]
    222             mname = "%s.%s" % (m.__name__, head)
    223             m = self.import_module(head, mname, m)
    224             if not m:
    225                 self.msgout(4, "raise ImportError: No module named", mname)
    226                 raise ImportError, "No module named " + mname
    227         self.msgout(4, "load_tail ->", m)
    228         return m
    229 
    230     def ensure_fromlist(self, m, fromlist, recursive=0):
    231         self.msg(4, "ensure_fromlist", m, fromlist, recursive)
    232         for sub in fromlist:
    233             if sub == "*":
    234                 if not recursive:
    235                     all = self.find_all_submodules(m)
    236                     if all:
    237                         self.ensure_fromlist(m, all, 1)
    238             elif not hasattr(m, sub):
    239                 subname = "%s.%s" % (m.__name__, sub)
    240                 submod = self.import_module(sub, subname, m)
    241                 if not submod:
    242                     raise ImportError, "No module named " + subname
    243 
    244     def find_all_submodules(self, m):
    245         if not m.__path__:
    246             return
    247         modules = {}
    248         # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
    249         # But we must also collect Python extension modules - although
    250         # we cannot separate normal dlls from Python extensions.
    251         suffixes = []
    252         for triple in imp.get_suffixes():
    253             suffixes.append(triple[0])
    254         for dir in m.__path__:
    255             try:
    256                 names = os.listdir(dir)
    257             except os.error:
    258                 self.msg(2, "can't list directory", dir)
    259                 continue
    260             for name in names:
    261                 mod = None
    262                 for suff in suffixes:
    263                     n = len(suff)
    264                     if name[-n:] == suff:
    265                         mod = name[:-n]
    266                         break
    267                 if mod and mod != "__init__":
    268                     modules[mod] = mod
    269         return modules.keys()
    270 
    271     def import_module(self, partname, fqname, parent):
    272         self.msgin(3, "import_module", partname, fqname, parent)
    273         try:
    274             m = self.modules[fqname]
    275         except KeyError:
    276             pass
    277         else:
    278             self.msgout(3, "import_module ->", m)
    279             return m
    280         if fqname in self.badmodules:
    281             self.msgout(3, "import_module -> None")
    282             return None
    283         if parent and parent.__path__ is None:
    284             self.msgout(3, "import_module -> None")
    285             return None
    286         try:
    287             fp, pathname, stuff = self.find_module(partname,
    288                                                    parent and parent.__path__, parent)
    289         except ImportError:
    290             self.msgout(3, "import_module ->", None)
    291             return None
    292         try:
    293             m = self.load_module(fqname, fp, pathname, stuff)
    294         finally:
    295             if fp: fp.close()
    296         if parent:
    297             setattr(parent, partname, m)
    298         self.msgout(3, "import_module ->", m)
    299         return m
    300 
    301     def load_module(self, fqname, fp, pathname, file_info):
    302         suffix, mode, type = file_info
    303         self.msgin(2, "load_module", fqname, fp and "fp", pathname)
    304         if type == imp.PKG_DIRECTORY:
    305             m = self.load_package(fqname, pathname)
    306             self.msgout(2, "load_module ->", m)
    307             return m
    308         if type == imp.PY_SOURCE:
    309             co = compile(fp.read()+'\n', pathname, 'exec')
    310         elif type == imp.PY_COMPILED:
    311             if fp.read(4) != imp.get_magic():
    312                 self.msgout(2, "raise ImportError: Bad magic number", pathname)
    313                 raise ImportError, "Bad magic number in %s" % pathname
    314             fp.read(4)
    315             co = marshal.load(fp)
    316         else:
    317             co = None
    318         m = self.add_module(fqname)
    319         m.__file__ = pathname
    320         if co:
    321             if self.replace_paths:
    322                 co = self.replace_paths_in_code(co)
    323             m.__code__ = co
    324             self.scan_code(co, m)
    325         self.msgout(2, "load_module ->", m)
    326         return m
    327 
    328     def _add_badmodule(self, name, caller):
    329         if name not in self.badmodules:
    330             self.badmodules[name] = {}
    331         if caller:
    332             self.badmodules[name][caller.__name__] = 1
    333         else:
    334             self.badmodules[name]["-"] = 1
    335 
    336     def _safe_import_hook(self, name, caller, fromlist, level=-1):
    337         # wrapper for self.import_hook() that won't raise ImportError
    338         if name in self.badmodules:
    339             self._add_badmodule(name, caller)
    340             return
    341         try:
    342             self.import_hook(name, caller, level=level)
    343         except ImportError, msg:
    344             self.msg(2, "ImportError:", str(msg))
    345             self._add_badmodule(name, caller)
    346         else:
    347             if fromlist:
    348                 for sub in fromlist:
    349                     if sub in self.badmodules:
    350                         self._add_badmodule(sub, caller)
    351                         continue
    352                     try:
    353                         self.import_hook(name, caller, [sub], level=level)
    354                     except ImportError, msg:
    355                         self.msg(2, "ImportError:", str(msg))
    356                         fullname = name + "." + sub
    357                         self._add_badmodule(fullname, caller)
    358 
    359     def scan_opcodes(self, co,
    360                      unpack = struct.unpack):
    361         # Scan the code, and yield 'interesting' opcode combinations
    362         # Version for Python 2.4 and older
    363         code = co.co_code
    364         names = co.co_names
    365         consts = co.co_consts
    366         opargs = [(op, arg) for _, op, arg in _unpack_opargs(code)
    367                   if op != EXTENDED_ARG]
    368         for i, (op, oparg) in enumerate(opargs):
    369             if c in STORE_OPS:
    370                 yield "store", (names[oparg],)
    371                 continue
    372             if (op == IMPORT_NAME and i >= 1
    373                     and opargs[i-1][0] == LOAD_CONST):
    374                 fromlist = consts[opargs[i-1][1]]
    375                 yield "import", (fromlist, names[oparg])
    376                 continue
    377 
    378     def scan_opcodes_25(self, co):
    379         # Scan the code, and yield 'interesting' opcode combinations
    380         code = co.co_code
    381         names = co.co_names
    382         consts = co.co_consts
    383         opargs = [(op, arg) for _, op, arg in _unpack_opargs(code)
    384                   if op != EXTENDED_ARG]
    385         for i, (op, oparg) in enumerate(opargs):
    386             if op in STORE_OPS:
    387                 yield "store", (names[oparg],)
    388                 continue
    389             if (op == IMPORT_NAME and i >= 2
    390                     and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
    391                 level = consts[opargs[i-2][1]]
    392                 fromlist = consts[opargs[i-1][1]]
    393                 if level == -1: # normal import
    394                     yield "import", (fromlist, names[oparg])
    395                 elif level == 0: # absolute import
    396                     yield "absolute_import", (fromlist, names[oparg])
    397                 else: # relative import
    398                     yield "relative_import", (level, fromlist, names[oparg])
    399                 continue
    400 
    401     def scan_code(self, co, m):
    402         code = co.co_code
    403         if sys.version_info >= (2, 5):
    404             scanner = self.scan_opcodes_25
    405         else:
    406             scanner = self.scan_opcodes
    407         for what, args in scanner(co):
    408             if what == "store":
    409                 name, = args
    410                 m.globalnames[name] = 1
    411             elif what in ("import", "absolute_import"):
    412                 fromlist, name = args
    413                 have_star = 0
    414                 if fromlist is not None:
    415                     if "*" in fromlist:
    416                         have_star = 1
    417                     fromlist = [f for f in fromlist if f != "*"]
    418                 if what == "absolute_import": level = 0
    419                 else: level = -1
    420                 self._safe_import_hook(name, m, fromlist, level=level)
    421                 if have_star:
    422                     # We've encountered an "import *". If it is a Python module,
    423                     # the code has already been parsed and we can suck out the
    424                     # global names.
    425                     mm = None
    426                     if m.__path__:
    427                         # At this point we don't know whether 'name' is a
    428                         # submodule of 'm' or a global module. Let's just try
    429                         # the full name first.
    430                         mm = self.modules.get(m.__name__ + "." + name)
    431                     if mm is None:
    432                         mm = self.modules.get(name)
    433                     if mm is not None:
    434                         m.globalnames.update(mm.globalnames)
    435                         m.starimports.update(mm.starimports)
    436                         if mm.__code__ is None:
    437                             m.starimports[name] = 1
    438                     else:
    439                         m.starimports[name] = 1
    440             elif what == "relative_import":
    441                 level, fromlist, name = args
    442                 if name:
    443                     self._safe_import_hook(name, m, fromlist, level=level)
    444                 else:
    445                     parent = self.determine_parent(m, level=level)
    446                     self._safe_import_hook(parent.__name__, None, fromlist, level=0)
    447             else:
    448                 # We don't expect anything else from the generator.
    449                 raise RuntimeError(what)
    450 
    451         for c in co.co_consts:
    452             if isinstance(c, type(co)):
    453                 self.scan_code(c, m)
    454 
    455     def load_package(self, fqname, pathname):
    456         self.msgin(2, "load_package", fqname, pathname)
    457         newname = replacePackageMap.get(fqname)
    458         if newname:
    459             fqname = newname
    460         m = self.add_module(fqname)
    461         m.__file__ = pathname
    462         m.__path__ = [pathname]
    463 
    464         # As per comment at top of file, simulate runtime __path__ additions.
    465         m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
    466 
    467         fp, buf, stuff = self.find_module("__init__", m.__path__)
    468         self.load_module(fqname, fp, buf, stuff)
    469         self.msgout(2, "load_package ->", m)
    470         if fp:
    471             fp.close()
    472         return m
    473 
    474     def add_module(self, fqname):
    475         if fqname in self.modules:
    476             return self.modules[fqname]
    477         self.modules[fqname] = m = Module(fqname)
    478         return m
    479 
    480     def find_module(self, name, path, parent=None):
    481         if parent is not None:
    482             # assert path is not None
    483             fullname = parent.__name__+'.'+name
    484         else:
    485             fullname = name
    486         if fullname in self.excludes:
    487             self.msgout(3, "find_module -> Excluded", fullname)
    488             raise ImportError, name
    489 
    490         if path is None:
    491             if name in sys.builtin_module_names:
    492                 return (None, None, ("", "", imp.C_BUILTIN))
    493 
    494             path = self.path
    495         return imp.find_module(name, path)
    496 
    497     def report(self):
    498         """Print a report to stdout, listing the found modules with their
    499         paths, as well as modules that are missing, or seem to be missing.
    500         """
    501         print
    502         print "  %-25s %s" % ("Name", "File")
    503         print "  %-25s %s" % ("----", "----")
    504         # Print modules found
    505         keys = self.modules.keys()
    506         keys.sort()
    507         for key in keys:
    508             m = self.modules[key]
    509             if m.__path__:
    510                 print "P",
    511             else:
    512                 print "m",
    513             print "%-25s" % key, m.__file__ or ""
    514 
    515         # Print missing modules
    516         missing, maybe = self.any_missing_maybe()
    517         if missing:
    518             print
    519             print "Missing modules:"
    520             for name in missing:
    521                 mods = self.badmodules[name].keys()
    522                 mods.sort()
    523                 print "?", name, "imported from", ', '.join(mods)
    524         # Print modules that may be missing, but then again, maybe not...
    525         if maybe:
    526             print
    527             print "Submodules that appear to be missing, but could also be",
    528             print "global names in the parent package:"
    529             for name in maybe:
    530                 mods = self.badmodules[name].keys()
    531                 mods.sort()
    532                 print "?", name, "imported from", ', '.join(mods)
    533 
    534     def any_missing(self):
    535         """Return a list of modules that appear to be missing. Use
    536         any_missing_maybe() if you want to know which modules are
    537         certain to be missing, and which *may* be missing.
    538         """
    539         missing, maybe = self.any_missing_maybe()
    540         return missing + maybe
    541 
    542     def any_missing_maybe(self):
    543         """Return two lists, one with modules that are certainly missing
    544         and one with modules that *may* be missing. The latter names could
    545         either be submodules *or* just global names in the package.
    546 
    547         The reason it can't always be determined is that it's impossible to
    548         tell which names are imported when "from module import *" is done
    549         with an extension module, short of actually importing it.
    550         """
    551         missing = []
    552         maybe = []
    553         for name in self.badmodules:
    554             if name in self.excludes:
    555                 continue
    556             i = name.rfind(".")
    557             if i < 0:
    558                 missing.append(name)
    559                 continue
    560             subname = name[i+1:]
    561             pkgname = name[:i]
    562             pkg = self.modules.get(pkgname)
    563             if pkg is not None:
    564                 if pkgname in self.badmodules[name]:
    565                     # The package tried to import this module itself and
    566                     # failed. It's definitely missing.
    567                     missing.append(name)
    568                 elif subname in pkg.globalnames:
    569                     # It's a global in the package: definitely not missing.
    570                     pass
    571                 elif pkg.starimports:
    572                     # It could be missing, but the package did an "import *"
    573                     # from a non-Python module, so we simply can't be sure.
    574                     maybe.append(name)
    575                 else:
    576                     # It's not a global in the package, the package didn't
    577                     # do funny star imports, it's very likely to be missing.
    578                     # The symbol could be inserted into the package from the
    579                     # outside, but since that's not good style we simply list
    580                     # it missing.
    581                     missing.append(name)
    582             else:
    583                 missing.append(name)
    584         missing.sort()
    585         maybe.sort()
    586         return missing, maybe
    587 
    588     def replace_paths_in_code(self, co):
    589         new_filename = original_filename = os.path.normpath(co.co_filename)
    590         for f, r in self.replace_paths:
    591             if original_filename.startswith(f):
    592                 new_filename = r + original_filename[len(f):]
    593                 break
    594 
    595         if self.debug and original_filename not in self.processed_paths:
    596             if new_filename != original_filename:
    597                 self.msgout(2, "co_filename %r changed to %r" \
    598                                     % (original_filename,new_filename,))
    599             else:
    600                 self.msgout(2, "co_filename %r remains unchanged" \
    601                                     % (original_filename,))
    602             self.processed_paths.append(original_filename)
    603 
    604         consts = list(co.co_consts)
    605         for i in range(len(consts)):
    606             if isinstance(consts[i], type(co)):
    607                 consts[i] = self.replace_paths_in_code(consts[i])
    608 
    609         return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize,
    610                          co.co_flags, co.co_code, tuple(consts), co.co_names,
    611                          co.co_varnames, new_filename, co.co_name,
    612                          co.co_firstlineno, co.co_lnotab,
    613                          co.co_freevars, co.co_cellvars)
    614 
    615 
    616 def test():
    617     # Parse command line
    618     import getopt
    619     try:
    620         opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
    621     except getopt.error, msg:
    622         print msg
    623         return
    624 
    625     # Process options
    626     debug = 1
    627     domods = 0
    628     addpath = []
    629     exclude = []
    630     for o, a in opts:
    631         if o == '-d':
    632             debug = debug + 1
    633         if o == '-m':
    634             domods = 1
    635         if o == '-p':
    636             addpath = addpath + a.split(os.pathsep)
    637         if o == '-q':
    638             debug = 0
    639         if o == '-x':
    640             exclude.append(a)
    641 
    642     # Provide default arguments
    643     if not args:
    644         script = "hello.py"
    645     else:
    646         script = args[0]
    647 
    648     # Set the path based on sys.path and the script directory
    649     path = sys.path[:]
    650     path[0] = os.path.dirname(script)
    651     path = addpath + path
    652     if debug > 1:
    653         print "path:"
    654         for item in path:
    655             print "   ", repr(item)
    656 
    657     # Create the module finder and turn its crank
    658     mf = ModuleFinder(path, debug, exclude)
    659     for arg in args[1:]:
    660         if arg == '-m':
    661             domods = 1
    662             continue
    663         if domods:
    664             if arg[-2:] == '.*':
    665                 mf.import_hook(arg[:-2], None, ["*"])
    666             else:
    667                 mf.import_hook(arg)
    668         else:
    669             mf.load_file(arg)
    670     mf.run_script(script)
    671     mf.report()
    672     return mf  # for -i debugging
    673 
    674 
    675 if __name__ == '__main__':
    676     try:
    677         mf = test()
    678     except KeyboardInterrupt:
    679         print "\n[interrupt]"
    680