Home | History | Annotate | Download | only in Lib
      1 """Find modules used by a script, using introspection."""
      2 # This module should be kept compatible with Python 2.2, see PEP 291.

      3 
      4 from __future__ import generators
      5 import dis
      6 import imp
      7 import marshal
      8 import os
      9 import sys
     10 import types
     11 import struct
     12 
     13 if hasattr(sys.__stdout__, "newlines"):
     14     READ_MODE = "U"  # universal line endings

     15 else:
     16     # remain compatible with Python  < 2.3

     17     READ_MODE = "r"
     18 
     19 LOAD_CONST = chr(dis.opname.index('LOAD_CONST'))
     20 IMPORT_NAME = chr(dis.opname.index('IMPORT_NAME'))
     21 STORE_NAME = chr(dis.opname.index('STORE_NAME'))
     22 STORE_GLOBAL = chr(dis.opname.index('STORE_GLOBAL'))
     23 STORE_OPS = [STORE_NAME, STORE_GLOBAL]
     24 HAVE_ARGUMENT = chr(dis.HAVE_ARGUMENT)
     25 
     26 # Modulefinder does a good job at simulating Python's, but it can not

     27 # handle __path__ modifications packages make at runtime.  Therefore there

     28 # is a mechanism whereby you can register extra paths in this map for a

     29 # package, and it will be honored.

     30 
     31 # Note this is a mapping is lists of paths.

     32 packagePathMap = {}
     33 
     34 # A Public interface

     35 def AddPackagePath(packagename, path):
     36     paths = packagePathMap.get(packagename, [])
     37     paths.append(path)
     38     packagePathMap[packagename] = paths
     39 
     40 replacePackageMap = {}
     41 
     42 # This ReplacePackage mechanism allows modulefinder to work around the

     43 # way the _xmlplus package injects itself under the name "xml" into

     44 # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")

     45 # before running ModuleFinder.

     46 
     47 def ReplacePackage(oldname, newname):
     48     replacePackageMap[oldname] = newname
     49 
     50 
     51 class Module:
     52 
     53     def __init__(self, name, file=None, path=None):
     54         self.__name__ = name
     55         self.__file__ = file
     56         self.__path__ = path
     57         self.__code__ = None
     58         # The set of global names that are assigned to in the module.

     59         # This includes those names imported through starimports of

     60         # Python modules.

     61         self.globalnames = {}
     62         # The set of starimports this module did that could not be

     63         # resolved, ie. a starimport from a non-Python module.

     64         self.starimports = {}
     65 
     66     def __repr__(self):
     67         s = "Module(%r" % (self.__name__,)
     68         if self.__file__ is not None:
     69             s = s + ", %r" % (self.__file__,)
     70         if self.__path__ is not None:
     71             s = s + ", %r" % (self.__path__,)
     72         s = s + ")"
     73         return s
     74 
     75 class ModuleFinder:
     76 
     77     def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
     78         if path is None:
     79             path = sys.path
     80         self.path = path
     81         self.modules = {}
     82         self.badmodules = {}
     83         self.debug = debug
     84         self.indent = 0
     85         self.excludes = excludes
     86         self.replace_paths = replace_paths
     87         self.processed_paths = []   # Used in debugging only

     88 
     89     def msg(self, level, str, *args):
     90         if level <= self.debug:
     91             for i in range(self.indent):
     92                 print "   ",
     93             print str,
     94             for arg in args:
     95                 print repr(arg),
     96             print
     97 
     98     def msgin(self, *args):
     99         level = args[0]
    100         if level <= self.debug:
    101             self.indent = self.indent + 1
    102             self.msg(*args)
    103 
    104     def msgout(self, *args):
    105         level = args[0]
    106         if level <= self.debug:
    107             self.indent = self.indent - 1
    108             self.msg(*args)
    109 
    110     def run_script(self, pathname):
    111         self.msg(2, "run_script", pathname)
    112         with open(pathname, READ_MODE) as fp:
    113             stuff = ("", "r", imp.PY_SOURCE)
    114             self.load_module('__main__', fp, pathname, stuff)
    115 
    116     def load_file(self, pathname):
    117         dir, name = os.path.split(pathname)
    118         name, ext = os.path.splitext(name)
    119         with open(pathname, READ_MODE) as fp:
    120             stuff = (ext, "r", imp.PY_SOURCE)
    121             self.load_module(name, fp, pathname, stuff)
    122 
    123     def import_hook(self, name, caller=None, fromlist=None, level=-1):
    124         self.msg(3, "import_hook", name, caller, fromlist, level)
    125         parent = self.determine_parent(caller, level=level)
    126         q, tail = self.find_head_package(parent, name)
    127         m = self.load_tail(q, tail)
    128         if not fromlist:
    129             return q
    130         if m.__path__:
    131             self.ensure_fromlist(m, fromlist)
    132         return None
    133 
    134     def determine_parent(self, caller, level=-1):
    135         self.msgin(4, "determine_parent", caller, level)
    136         if not caller or level == 0:
    137             self.msgout(4, "determine_parent -> None")
    138             return None
    139         pname = caller.__name__
    140         if level >= 1: # relative import

    141             if caller.__path__:
    142                 level -= 1
    143             if level == 0:
    144                 parent = self.modules[pname]
    145                 assert parent is caller
    146                 self.msgout(4, "determine_parent ->", parent)
    147                 return parent
    148             if pname.count(".") < level:
    149                 raise ImportError, "relative importpath too deep"
    150             pname = ".".join(pname.split(".")[:-level])
    151             parent = self.modules[pname]
    152             self.msgout(4, "determine_parent ->", parent)
    153             return parent
    154         if caller.__path__:
    155             parent = self.modules[pname]
    156             assert caller is parent
    157             self.msgout(4, "determine_parent ->", parent)
    158             return parent
    159         if '.' in pname:
    160             i = pname.rfind('.')
    161             pname = pname[:i]
    162             parent = self.modules[pname]
    163             assert parent.__name__ == pname
    164             self.msgout(4, "determine_parent ->", parent)
    165             return parent
    166         self.msgout(4, "determine_parent -> None")
    167         return None
    168 
    169     def find_head_package(self, parent, name):
    170         self.msgin(4, "find_head_package", parent, name)
    171         if '.' in name:
    172             i = name.find('.')
    173             head = name[:i]
    174             tail = name[i+1:]
    175         else:
    176             head = name
    177             tail = ""
    178         if parent:
    179             qname = "%s.%s" % (parent.__name__, head)
    180         else:
    181             qname = head
    182         q = self.import_module(head, qname, parent)
    183         if q:
    184             self.msgout(4, "find_head_package ->", (q, tail))
    185             return q, tail
    186         if parent:
    187             qname = head
    188             parent = None
    189             q = self.import_module(head, qname, parent)
    190             if q:
    191                 self.msgout(4, "find_head_package ->", (q, tail))
    192                 return q, tail
    193         self.msgout(4, "raise ImportError: No module named", qname)
    194         raise ImportError, "No module named " + qname
    195 
    196     def load_tail(self, q, tail):
    197         self.msgin(4, "load_tail", q, tail)
    198         m = q
    199         while tail:
    200             i = tail.find('.')
    201             if i < 0: i = len(tail)
    202             head, tail = tail[:i], tail[i+1:]
    203             mname = "%s.%s" % (m.__name__, head)
    204             m = self.import_module(head, mname, m)
    205             if not m:
    206                 self.msgout(4, "raise ImportError: No module named", mname)
    207                 raise ImportError, "No module named " + mname
    208         self.msgout(4, "load_tail ->", m)
    209         return m
    210 
    211     def ensure_fromlist(self, m, fromlist, recursive=0):
    212         self.msg(4, "ensure_fromlist", m, fromlist, recursive)
    213         for sub in fromlist:
    214             if sub == "*":
    215                 if not recursive:
    216                     all = self.find_all_submodules(m)
    217                     if all:
    218                         self.ensure_fromlist(m, all, 1)
    219             elif not hasattr(m, sub):
    220                 subname = "%s.%s" % (m.__name__, sub)
    221                 submod = self.import_module(sub, subname, m)
    222                 if not submod:
    223                     raise ImportError, "No module named " + subname
    224 
    225     def find_all_submodules(self, m):
    226         if not m.__path__:
    227             return
    228         modules = {}
    229         # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].

    230         # But we must also collect Python extension modules - although

    231         # we cannot separate normal dlls from Python extensions.

    232         suffixes = []
    233         for triple in imp.get_suffixes():
    234             suffixes.append(triple[0])
    235         for dir in m.__path__:
    236             try:
    237                 names = os.listdir(dir)
    238             except os.error:
    239                 self.msg(2, "can't list directory", dir)
    240                 continue
    241             for name in names:
    242                 mod = None
    243                 for suff in suffixes:
    244                     n = len(suff)
    245                     if name[-n:] == suff:
    246                         mod = name[:-n]
    247                         break
    248                 if mod and mod != "__init__":
    249                     modules[mod] = mod
    250         return modules.keys()
    251 
    252     def import_module(self, partname, fqname, parent):
    253         self.msgin(3, "import_module", partname, fqname, parent)
    254         try:
    255             m = self.modules[fqname]
    256         except KeyError:
    257             pass
    258         else:
    259             self.msgout(3, "import_module ->", m)
    260             return m
    261         if fqname in self.badmodules:
    262             self.msgout(3, "import_module -> None")
    263             return None
    264         if parent and parent.__path__ is None:
    265             self.msgout(3, "import_module -> None")
    266             return None
    267         try:
    268             fp, pathname, stuff = self.find_module(partname,
    269                                                    parent and parent.__path__, parent)
    270         except ImportError:
    271             self.msgout(3, "import_module ->", None)
    272             return None
    273         try:
    274             m = self.load_module(fqname, fp, pathname, stuff)
    275         finally:
    276             if fp: fp.close()
    277         if parent:
    278             setattr(parent, partname, m)
    279         self.msgout(3, "import_module ->", m)
    280         return m
    281 
    282     def load_module(self, fqname, fp, pathname, file_info):
    283         suffix, mode, type = file_info
    284         self.msgin(2, "load_module", fqname, fp and "fp", pathname)
    285         if type == imp.PKG_DIRECTORY:
    286             m = self.load_package(fqname, pathname)
    287             self.msgout(2, "load_module ->", m)
    288             return m
    289         if type == imp.PY_SOURCE:
    290             co = compile(fp.read()+'\n', pathname, 'exec')
    291         elif type == imp.PY_COMPILED:
    292             if fp.read(4) != imp.get_magic():
    293                 self.msgout(2, "raise ImportError: Bad magic number", pathname)
    294                 raise ImportError, "Bad magic number in %s" % pathname
    295             fp.read(4)
    296             co = marshal.load(fp)
    297         else:
    298             co = None
    299         m = self.add_module(fqname)
    300         m.__file__ = pathname
    301         if co:
    302             if self.replace_paths:
    303                 co = self.replace_paths_in_code(co)
    304             m.__code__ = co
    305             self.scan_code(co, m)
    306         self.msgout(2, "load_module ->", m)
    307         return m
    308 
    309     def _add_badmodule(self, name, caller):
    310         if name not in self.badmodules:
    311             self.badmodules[name] = {}
    312         if caller:
    313             self.badmodules[name][caller.__name__] = 1
    314         else:
    315             self.badmodules[name]["-"] = 1
    316 
    317     def _safe_import_hook(self, name, caller, fromlist, level=-1):
    318         # wrapper for self.import_hook() that won't raise ImportError

    319         if name in self.badmodules:
    320             self._add_badmodule(name, caller)
    321             return
    322         try:
    323             self.import_hook(name, caller, level=level)
    324         except ImportError, msg:
    325             self.msg(2, "ImportError:", str(msg))
    326             self._add_badmodule(name, caller)
    327         else:
    328             if fromlist:
    329                 for sub in fromlist:
    330                     if sub in self.badmodules:
    331                         self._add_badmodule(sub, caller)
    332                         continue
    333                     try:
    334                         self.import_hook(name, caller, [sub], level=level)
    335                     except ImportError, msg:
    336                         self.msg(2, "ImportError:", str(msg))
    337                         fullname = name + "." + sub
    338                         self._add_badmodule(fullname, caller)
    339 
    340     def scan_opcodes(self, co,
    341                      unpack = struct.unpack):
    342         # Scan the code, and yield 'interesting' opcode combinations

    343         # Version for Python 2.4 and older

    344         code = co.co_code
    345         names = co.co_names
    346         consts = co.co_consts
    347         while code:
    348             c = code[0]
    349             if c in STORE_OPS:
    350                 oparg, = unpack('<H', code[1:3])
    351                 yield "store", (names[oparg],)
    352                 code = code[3:]
    353                 continue
    354             if c == LOAD_CONST and code[3] == IMPORT_NAME:
    355                 oparg_1, oparg_2 = unpack('<xHxH', code[:6])
    356                 yield "import", (consts[oparg_1], names[oparg_2])
    357                 code = code[6:]
    358                 continue
    359             if c >= HAVE_ARGUMENT:
    360                 code = code[3:]
    361             else:
    362                 code = code[1:]
    363 
    364     def scan_opcodes_25(self, co,
    365                      unpack = struct.unpack):
    366         # Scan the code, and yield 'interesting' opcode combinations

    367         # Python 2.5 version (has absolute and relative imports)

    368         code = co.co_code
    369         names = co.co_names
    370         consts = co.co_consts
    371         LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
    372         while code:
    373             c = code[0]
    374             if c in STORE_OPS:
    375                 oparg, = unpack('<H', code[1:3])
    376                 yield "store", (names[oparg],)
    377                 code = code[3:]
    378                 continue
    379             if code[:9:3] == LOAD_LOAD_AND_IMPORT:
    380                 oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
    381                 level = consts[oparg_1]
    382                 if level == -1: # normal import

    383                     yield "import", (consts[oparg_2], names[oparg_3])
    384                 elif level == 0: # absolute import

    385                     yield "absolute_import", (consts[oparg_2], names[oparg_3])
    386                 else: # relative import

    387                     yield "relative_import", (level, consts[oparg_2], names[oparg_3])
    388                 code = code[9:]
    389                 continue
    390             if c >= HAVE_ARGUMENT:
    391                 code = code[3:]
    392             else:
    393                 code = code[1:]
    394 
    395     def scan_code(self, co, m):
    396         code = co.co_code
    397         if sys.version_info >= (2, 5):
    398             scanner = self.scan_opcodes_25
    399         else:
    400             scanner = self.scan_opcodes
    401         for what, args in scanner(co):
    402             if what == "store":
    403                 name, = args
    404                 m.globalnames[name] = 1
    405             elif what in ("import", "absolute_import"):
    406                 fromlist, name = args
    407                 have_star = 0
    408                 if fromlist is not None:
    409                     if "*" in fromlist:
    410                         have_star = 1
    411                     fromlist = [f for f in fromlist if f != "*"]
    412                 if what == "absolute_import": level = 0
    413                 else: level = -1
    414                 self._safe_import_hook(name, m, fromlist, level=level)
    415                 if have_star:
    416                     # We've encountered an "import *". If it is a Python module,

    417                     # the code has already been parsed and we can suck out the

    418                     # global names.

    419                     mm = None
    420                     if m.__path__:
    421                         # At this point we don't know whether 'name' is a

    422                         # submodule of 'm' or a global module. Let's just try

    423                         # the full name first.

    424                         mm = self.modules.get(m.__name__ + "." + name)
    425                     if mm is None:
    426                         mm = self.modules.get(name)
    427                     if mm is not None:
    428                         m.globalnames.update(mm.globalnames)
    429                         m.starimports.update(mm.starimports)
    430                         if mm.__code__ is None:
    431                             m.starimports[name] = 1
    432                     else:
    433                         m.starimports[name] = 1
    434             elif what == "relative_import":
    435                 level, fromlist, name = args
    436                 if name:
    437                     self._safe_import_hook(name, m, fromlist, level=level)
    438                 else:
    439                     parent = self.determine_parent(m, level=level)
    440                     self._safe_import_hook(parent.__name__, None, fromlist, level=0)
    441             else:
    442                 # We don't expect anything else from the generator.

    443                 raise RuntimeError(what)
    444 
    445         for c in co.co_consts:
    446             if isinstance(c, type(co)):
    447                 self.scan_code(c, m)
    448 
    449     def load_package(self, fqname, pathname):
    450         self.msgin(2, "load_package", fqname, pathname)
    451         newname = replacePackageMap.get(fqname)
    452         if newname:
    453             fqname = newname
    454         m = self.add_module(fqname)
    455         m.__file__ = pathname
    456         m.__path__ = [pathname]
    457 
    458         # As per comment at top of file, simulate runtime __path__ additions.

    459         m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
    460 
    461         fp, buf, stuff = self.find_module("__init__", m.__path__)
    462         self.load_module(fqname, fp, buf, stuff)
    463         self.msgout(2, "load_package ->", m)
    464         if fp:
    465             fp.close()
    466         return m
    467 
    468     def add_module(self, fqname):
    469         if fqname in self.modules:
    470             return self.modules[fqname]
    471         self.modules[fqname] = m = Module(fqname)
    472         return m
    473 
    474     def find_module(self, name, path, parent=None):
    475         if parent is not None:
    476             # assert path is not None

    477             fullname = parent.__name__+'.'+name
    478         else:
    479             fullname = name
    480         if fullname in self.excludes:
    481             self.msgout(3, "find_module -> Excluded", fullname)
    482             raise ImportError, name
    483 
    484         if path is None:
    485             if name in sys.builtin_module_names:
    486                 return (None, None, ("", "", imp.C_BUILTIN))
    487 
    488             path = self.path
    489         return imp.find_module(name, path)
    490 
    491     def report(self):
    492         """Print a report to stdout, listing the found modules with their
    493         paths, as well as modules that are missing, or seem to be missing.
    494         """
    495         print
    496         print "  %-25s %s" % ("Name", "File")
    497         print "  %-25s %s" % ("----", "----")
    498         # Print modules found

    499         keys = self.modules.keys()
    500         keys.sort()
    501         for key in keys:
    502             m = self.modules[key]
    503             if m.__path__:
    504                 print "P",
    505             else:
    506                 print "m",
    507             print "%-25s" % key, m.__file__ or ""
    508 
    509         # Print missing modules

    510         missing, maybe = self.any_missing_maybe()
    511         if missing:
    512             print
    513             print "Missing modules:"
    514             for name in missing:
    515                 mods = self.badmodules[name].keys()
    516                 mods.sort()
    517                 print "?", name, "imported from", ', '.join(mods)
    518         # Print modules that may be missing, but then again, maybe not...

    519         if maybe:
    520             print
    521             print "Submodules that appear to be missing, but could also be",
    522             print "global names in the parent package:"
    523             for name in maybe:
    524                 mods = self.badmodules[name].keys()
    525                 mods.sort()
    526                 print "?", name, "imported from", ', '.join(mods)
    527 
    528     def any_missing(self):
    529         """Return a list of modules that appear to be missing. Use
    530         any_missing_maybe() if you want to know which modules are
    531         certain to be missing, and which *may* be missing.
    532         """
    533         missing, maybe = self.any_missing_maybe()
    534         return missing + maybe
    535 
    536     def any_missing_maybe(self):
    537         """Return two lists, one with modules that are certainly missing
    538         and one with modules that *may* be missing. The latter names could
    539         either be submodules *or* just global names in the package.
    540 
    541         The reason it can't always be determined is that it's impossible to
    542         tell which names are imported when "from module import *" is done
    543         with an extension module, short of actually importing it.
    544         """
    545         missing = []
    546         maybe = []
    547         for name in self.badmodules:
    548             if name in self.excludes:
    549                 continue
    550             i = name.rfind(".")
    551             if i < 0:
    552                 missing.append(name)
    553                 continue
    554             subname = name[i+1:]
    555             pkgname = name[:i]
    556             pkg = self.modules.get(pkgname)
    557             if pkg is not None:
    558                 if pkgname in self.badmodules[name]:
    559                     # The package tried to import this module itself and

    560                     # failed. It's definitely missing.

    561                     missing.append(name)
    562                 elif subname in pkg.globalnames:
    563                     # It's a global in the package: definitely not missing.

    564                     pass
    565                 elif pkg.starimports:
    566                     # It could be missing, but the package did an "import *"

    567                     # from a non-Python module, so we simply can't be sure.

    568                     maybe.append(name)
    569                 else:
    570                     # It's not a global in the package, the package didn't

    571                     # do funny star imports, it's very likely to be missing.

    572                     # The symbol could be inserted into the package from the

    573                     # outside, but since that's not good style we simply list

    574                     # it missing.

    575                     missing.append(name)
    576             else:
    577                 missing.append(name)
    578         missing.sort()
    579         maybe.sort()
    580         return missing, maybe
    581 
    582     def replace_paths_in_code(self, co):
    583         new_filename = original_filename = os.path.normpath(co.co_filename)
    584         for f, r in self.replace_paths:
    585             if original_filename.startswith(f):
    586                 new_filename = r + original_filename[len(f):]
    587                 break
    588 
    589         if self.debug and original_filename not in self.processed_paths:
    590             if new_filename != original_filename:
    591                 self.msgout(2, "co_filename %r changed to %r" \
    592                                     % (original_filename,new_filename,))
    593             else:
    594                 self.msgout(2, "co_filename %r remains unchanged" \
    595                                     % (original_filename,))
    596             self.processed_paths.append(original_filename)
    597 
    598         consts = list(co.co_consts)
    599         for i in range(len(consts)):
    600             if isinstance(consts[i], type(co)):
    601                 consts[i] = self.replace_paths_in_code(consts[i])
    602 
    603         return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize,
    604                          co.co_flags, co.co_code, tuple(consts), co.co_names,
    605                          co.co_varnames, new_filename, co.co_name,
    606                          co.co_firstlineno, co.co_lnotab,
    607                          co.co_freevars, co.co_cellvars)
    608 
    609 
    610 def test():
    611     # Parse command line

    612     import getopt
    613     try:
    614         opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
    615     except getopt.error, msg:
    616         print msg
    617         return
    618 
    619     # Process options

    620     debug = 1
    621     domods = 0
    622     addpath = []
    623     exclude = []
    624     for o, a in opts:
    625         if o == '-d':
    626             debug = debug + 1
    627         if o == '-m':
    628             domods = 1
    629         if o == '-p':
    630             addpath = addpath + a.split(os.pathsep)
    631         if o == '-q':
    632             debug = 0
    633         if o == '-x':
    634             exclude.append(a)
    635 
    636     # Provide default arguments

    637     if not args:
    638         script = "hello.py"
    639     else:
    640         script = args[0]
    641 
    642     # Set the path based on sys.path and the script directory

    643     path = sys.path[:]
    644     path[0] = os.path.dirname(script)
    645     path = addpath + path
    646     if debug > 1:
    647         print "path:"
    648         for item in path:
    649             print "   ", repr(item)
    650 
    651     # Create the module finder and turn its crank

    652     mf = ModuleFinder(path, debug, exclude)
    653     for arg in args[1:]:
    654         if arg == '-m':
    655             domods = 1
    656             continue
    657         if domods:
    658             if arg[-2:] == '.*':
    659                 mf.import_hook(arg[:-2], None, ["*"])
    660             else:
    661                 mf.import_hook(arg)
    662         else:
    663             mf.load_file(arg)
    664     mf.run_script(script)
    665     mf.report()
    666     return mf  # for -i debugging

    667 
    668 
    669 if __name__ == '__main__':
    670     try:
    671         mf = test()
    672     except KeyboardInterrupt:
    673         print "\n[interrupt]"
    674