Home | History | Annotate | Download | only in python2.7
      1 """Find modules used by a script, using introspection."""
      2 # This module should be kept compatible with Python 2.2, see PEP 291.
      3 
      4 from __future__ import generators
      5 import dis
      6 import imp
      7 import marshal
      8 import os
      9 import sys
     10 import types
     11 import struct
     12 
     13 if hasattr(sys.__stdout__, "newlines"):
     14     READ_MODE = "U"  # universal line endings
     15 else:
     16     # remain compatible with Python  < 2.3
     17     READ_MODE = "r"
     18 
     19 LOAD_CONST = chr(dis.opname.index('LOAD_CONST'))
     20 IMPORT_NAME = chr(dis.opname.index('IMPORT_NAME'))
     21 STORE_NAME = chr(dis.opname.index('STORE_NAME'))
     22 STORE_GLOBAL = chr(dis.opname.index('STORE_GLOBAL'))
     23 STORE_OPS = [STORE_NAME, STORE_GLOBAL]
     24 HAVE_ARGUMENT = chr(dis.HAVE_ARGUMENT)
     25 
     26 # Modulefinder does a good job at simulating Python's, but it can not
     27 # handle __path__ modifications packages make at runtime.  Therefore there
     28 # is a mechanism whereby you can register extra paths in this map for a
     29 # package, and it will be honored.
     30 
     31 # Note this is a mapping is lists of paths.
     32 packagePathMap = {}
     33 
     34 # A Public interface
     35 def AddPackagePath(packagename, path):
     36     paths = packagePathMap.get(packagename, [])
     37     paths.append(path)
     38     packagePathMap[packagename] = paths
     39 
     40 replacePackageMap = {}
     41 
     42 # This ReplacePackage mechanism allows modulefinder to work around the
     43 # way the _xmlplus package injects itself under the name "xml" into
     44 # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
     45 # before running ModuleFinder.
     46 
     47 def ReplacePackage(oldname, newname):
     48     replacePackageMap[oldname] = newname
     49 
     50 
     51 class Module:
     52 
     53     def __init__(self, name, file=None, path=None):
     54         self.__name__ = name
     55         self.__file__ = file
     56         self.__path__ = path
     57         self.__code__ = None
     58         # The set of global names that are assigned to in the module.
     59         # This includes those names imported through starimports of
     60         # Python modules.
     61         self.globalnames = {}
     62         # The set of starimports this module did that could not be
     63         # resolved, ie. a starimport from a non-Python module.
     64         self.starimports = {}
     65 
     66     def __repr__(self):
     67         s = "Module(%r" % (self.__name__,)
     68         if self.__file__ is not None:
     69             s = s + ", %r" % (self.__file__,)
     70         if self.__path__ is not None:
     71             s = s + ", %r" % (self.__path__,)
     72         s = s + ")"
     73         return s
     74 
     75 class ModuleFinder:
     76 
     77     def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
     78         if path is None:
     79             path = sys.path
     80         self.path = path
     81         self.modules = {}
     82         self.badmodules = {}
     83         self.debug = debug
     84         self.indent = 0
     85         self.excludes = excludes
     86         self.replace_paths = replace_paths
     87         self.processed_paths = []   # Used in debugging only
     88 
     89     def msg(self, level, str, *args):
     90         if level <= self.debug:
     91             for i in range(self.indent):
     92                 print "   ",
     93             print str,
     94             for arg in args:
     95                 print repr(arg),
     96             print
     97 
     98     def msgin(self, *args):
     99         level = args[0]
    100         if level <= self.debug:
    101             self.indent = self.indent + 1
    102             self.msg(*args)
    103 
    104     def msgout(self, *args):
    105         level = args[0]
    106         if level <= self.debug:
    107             self.indent = self.indent - 1
    108             self.msg(*args)
    109 
    110     def run_script(self, pathname):
    111         self.msg(2, "run_script", pathname)
    112         fp = open(pathname, READ_MODE)
    113         stuff = ("", "r", imp.PY_SOURCE)
    114         self.load_module('__main__', fp, pathname, stuff)
    115 
    116     def load_file(self, pathname):
    117         dir, name = os.path.split(pathname)
    118         name, ext = os.path.splitext(name)
    119         fp = open(pathname, READ_MODE)
    120         stuff = (ext, "r", imp.PY_SOURCE)
    121         self.load_module(name, fp, pathname, stuff)
    122 
    123     def import_hook(self, name, caller=None, fromlist=None, level=-1):
    124         self.msg(3, "import_hook", name, caller, fromlist, level)
    125         parent = self.determine_parent(caller, level=level)
    126         q, tail = self.find_head_package(parent, name)
    127         m = self.load_tail(q, tail)
    128         if not fromlist:
    129             return q
    130         if m.__path__:
    131             self.ensure_fromlist(m, fromlist)
    132         return None
    133 
    134     def determine_parent(self, caller, level=-1):
    135         self.msgin(4, "determine_parent", caller, level)
    136         if not caller or level == 0:
    137             self.msgout(4, "determine_parent -> None")
    138             return None
    139         pname = caller.__name__
    140         if level >= 1: # relative import
    141             if caller.__path__:
    142                 level -= 1
    143             if level == 0:
    144                 parent = self.modules[pname]
    145                 assert parent is caller
    146                 self.msgout(4, "determine_parent ->", parent)
    147                 return parent
    148             if pname.count(".") < level:
    149                 raise ImportError, "relative importpath too deep"
    150             pname = ".".join(pname.split(".")[:-level])
    151             parent = self.modules[pname]
    152             self.msgout(4, "determine_parent ->", parent)
    153             return parent
    154         if caller.__path__:
    155             parent = self.modules[pname]
    156             assert caller is parent
    157             self.msgout(4, "determine_parent ->", parent)
    158             return parent
    159         if '.' in pname:
    160             i = pname.rfind('.')
    161             pname = pname[:i]
    162             parent = self.modules[pname]
    163             assert parent.__name__ == pname
    164             self.msgout(4, "determine_parent ->", parent)
    165             return parent
    166         self.msgout(4, "determine_parent -> None")
    167         return None
    168 
    169     def find_head_package(self, parent, name):
    170         self.msgin(4, "find_head_package", parent, name)
    171         if '.' in name:
    172             i = name.find('.')
    173             head = name[:i]
    174             tail = name[i+1:]
    175         else:
    176             head = name
    177             tail = ""
    178         if parent:
    179             qname = "%s.%s" % (parent.__name__, head)
    180         else:
    181             qname = head
    182         q = self.import_module(head, qname, parent)
    183         if q:
    184             self.msgout(4, "find_head_package ->", (q, tail))
    185             return q, tail
    186         if parent:
    187             qname = head
    188             parent = None
    189             q = self.import_module(head, qname, parent)
    190             if q:
    191                 self.msgout(4, "find_head_package ->", (q, tail))
    192                 return q, tail
    193         self.msgout(4, "raise ImportError: No module named", qname)
    194         raise ImportError, "No module named " + qname
    195 
    196     def load_tail(self, q, tail):
    197         self.msgin(4, "load_tail", q, tail)
    198         m = q
    199         while tail:
    200             i = tail.find('.')
    201             if i < 0: i = len(tail)
    202             head, tail = tail[:i], tail[i+1:]
    203             mname = "%s.%s" % (m.__name__, head)
    204             m = self.import_module(head, mname, m)
    205             if not m:
    206                 self.msgout(4, "raise ImportError: No module named", mname)
    207                 raise ImportError, "No module named " + mname
    208         self.msgout(4, "load_tail ->", m)
    209         return m
    210 
    211     def ensure_fromlist(self, m, fromlist, recursive=0):
    212         self.msg(4, "ensure_fromlist", m, fromlist, recursive)
    213         for sub in fromlist:
    214             if sub == "*":
    215                 if not recursive:
    216                     all = self.find_all_submodules(m)
    217                     if all:
    218                         self.ensure_fromlist(m, all, 1)
    219             elif not hasattr(m, sub):
    220                 subname = "%s.%s" % (m.__name__, sub)
    221                 submod = self.import_module(sub, subname, m)
    222                 if not submod:
    223                     raise ImportError, "No module named " + subname
    224 
    225     def find_all_submodules(self, m):
    226         if not m.__path__:
    227             return
    228         modules = {}
    229         # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
    230         # But we must also collect Python extension modules - although
    231         # we cannot separate normal dlls from Python extensions.
    232         suffixes = []
    233         for triple in imp.get_suffixes():
    234             suffixes.append(triple[0])
    235         for dir in m.__path__:
    236             try:
    237                 names = os.listdir(dir)
    238             except os.error:
    239                 self.msg(2, "can't list directory", dir)
    240                 continue
    241             for name in names:
    242                 mod = None
    243                 for suff in suffixes:
    244                     n = len(suff)
    245                     if name[-n:] == suff:
    246                         mod = name[:-n]
    247                         break
    248                 if mod and mod != "__init__":
    249                     modules[mod] = mod
    250         return modules.keys()
    251 
    252     def import_module(self, partname, fqname, parent):
    253         self.msgin(3, "import_module", partname, fqname, parent)
    254         try:
    255             m = self.modules[fqname]
    256         except KeyError:
    257             pass
    258         else:
    259             self.msgout(3, "import_module ->", m)
    260             return m
    261         if fqname in self.badmodules:
    262             self.msgout(3, "import_module -> None")
    263             return None
    264         if parent and parent.__path__ is None:
    265             self.msgout(3, "import_module -> None")
    266             return None
    267         try:
    268             fp, pathname, stuff = self.find_module(partname,
    269                                                    parent and parent.__path__, parent)
    270         except ImportError:
    271             self.msgout(3, "import_module ->", None)
    272             return None
    273         try:
    274             m = self.load_module(fqname, fp, pathname, stuff)
    275         finally:
    276             if fp: fp.close()
    277         if parent:
    278             setattr(parent, partname, m)
    279         self.msgout(3, "import_module ->", m)
    280         return m
    281 
    282     def load_module(self, fqname, fp, pathname, file_info):
    283         suffix, mode, type = file_info
    284         self.msgin(2, "load_module", fqname, fp and "fp", pathname)
    285         if type == imp.PKG_DIRECTORY:
    286             m = self.load_package(fqname, pathname)
    287             self.msgout(2, "load_module ->", m)
    288             return m
    289         if type == imp.PY_SOURCE:
    290             co = compile(fp.read()+'\n', pathname, 'exec')
    291         elif type == imp.PY_COMPILED:
    292             if fp.read(4) != imp.get_magic():
    293                 self.msgout(2, "raise ImportError: Bad magic number", pathname)
    294                 raise ImportError, "Bad magic number in %s" % pathname
    295             fp.read(4)
    296             co = marshal.load(fp)
    297         else:
    298             co = None
    299         m = self.add_module(fqname)
    300         m.__file__ = pathname
    301         if co:
    302             if self.replace_paths:
    303                 co = self.replace_paths_in_code(co)
    304             m.__code__ = co
    305             self.scan_code(co, m)
    306         self.msgout(2, "load_module ->", m)
    307         return m
    308 
    309     def _add_badmodule(self, name, caller):
    310         if name not in self.badmodules:
    311             self.badmodules[name] = {}
    312         if caller:
    313             self.badmodules[name][caller.__name__] = 1
    314         else:
    315             self.badmodules[name]["-"] = 1
    316 
    317     def _safe_import_hook(self, name, caller, fromlist, level=-1):
    318         # wrapper for self.import_hook() that won't raise ImportError
    319         if name in self.badmodules:
    320             self._add_badmodule(name, caller)
    321             return
    322         try:
    323             self.import_hook(name, caller, level=level)
    324         except ImportError, msg:
    325             self.msg(2, "ImportError:", str(msg))
    326             self._add_badmodule(name, caller)
    327         else:
    328             if fromlist:
    329                 for sub in fromlist:
    330                     if sub in self.badmodules:
    331                         self._add_badmodule(sub, caller)
    332                         continue
    333                     try:
    334                         self.import_hook(name, caller, [sub], level=level)
    335                     except ImportError, msg:
    336                         self.msg(2, "ImportError:", str(msg))
    337                         fullname = name + "." + sub
    338                         self._add_badmodule(fullname, caller)
    339 
    340     def scan_opcodes(self, co,
    341                      unpack = struct.unpack):
    342         # Scan the code, and yield 'interesting' opcode combinations
    343         # Version for Python 2.4 and older
    344         code = co.co_code
    345         names = co.co_names
    346         consts = co.co_consts
    347         while code:
    348             c = code[0]
    349             if c in STORE_OPS:
    350                 oparg, = unpack('<H', code[1:3])
    351                 yield "store", (names[oparg],)
    352                 code = code[3:]
    353                 continue
    354             if c == LOAD_CONST and code[3] == IMPORT_NAME:
    355                 oparg_1, oparg_2 = unpack('<xHxH', code[:6])
    356                 yield "import", (consts[oparg_1], names[oparg_2])
    357                 code = code[6:]
    358                 continue
    359             if c >= HAVE_ARGUMENT:
    360                 code = code[3:]
    361             else:
    362                 code = code[1:]
    363 
    364     def scan_opcodes_25(self, co,
    365                      unpack = struct.unpack):
    366         # Scan the code, and yield 'interesting' opcode combinations
    367         # Python 2.5 version (has absolute and relative imports)
    368         code = co.co_code
    369         names = co.co_names
    370         consts = co.co_consts
    371         LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
    372         while code:
    373             c = code[0]
    374             if c in STORE_OPS:
    375                 oparg, = unpack('<H', code[1:3])
    376                 yield "store", (names[oparg],)
    377                 code = code[3:]
    378                 continue
    379             if code[:9:3] == LOAD_LOAD_AND_IMPORT:
    380                 oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
    381                 level = consts[oparg_1]
    382                 if level == -1: # normal import
    383                     yield "import", (consts[oparg_2], names[oparg_3])
    384                 elif level == 0: # absolute import
    385                     yield "absolute_import", (consts[oparg_2], names[oparg_3])
    386                 else: # relative import
    387                     yield "relative_import", (level, consts[oparg_2], names[oparg_3])
    388                 code = code[9:]
    389                 continue
    390             if c >= HAVE_ARGUMENT:
    391                 code = code[3:]
    392             else:
    393                 code = code[1:]
    394 
    395     def scan_code(self, co, m):
    396         code = co.co_code
    397         if sys.version_info >= (2, 5):
    398             scanner = self.scan_opcodes_25
    399         else:
    400             scanner = self.scan_opcodes
    401         for what, args in scanner(co):
    402             if what == "store":
    403                 name, = args
    404                 m.globalnames[name] = 1
    405             elif what in ("import", "absolute_import"):
    406                 fromlist, name = args
    407                 have_star = 0
    408                 if fromlist is not None:
    409                     if "*" in fromlist:
    410                         have_star = 1
    411                     fromlist = [f for f in fromlist if f != "*"]
    412                 if what == "absolute_import": level = 0
    413                 else: level = -1
    414                 self._safe_import_hook(name, m, fromlist, level=level)
    415                 if have_star:
    416                     # We've encountered an "import *". If it is a Python module,
    417                     # the code has already been parsed and we can suck out the
    418                     # global names.
    419                     mm = None
    420                     if m.__path__:
    421                         # At this point we don't know whether 'name' is a
    422                         # submodule of 'm' or a global module. Let's just try
    423                         # the full name first.
    424                         mm = self.modules.get(m.__name__ + "." + name)
    425                     if mm is None:
    426                         mm = self.modules.get(name)
    427                     if mm is not None:
    428                         m.globalnames.update(mm.globalnames)
    429                         m.starimports.update(mm.starimports)
    430                         if mm.__code__ is None:
    431                             m.starimports[name] = 1
    432                     else:
    433                         m.starimports[name] = 1
    434             elif what == "relative_import":
    435                 level, fromlist, name = args
    436                 if name:
    437                     self._safe_import_hook(name, m, fromlist, level=level)
    438                 else:
    439                     parent = self.determine_parent(m, level=level)
    440                     self._safe_import_hook(parent.__name__, None, fromlist, level=0)
    441             else:
    442                 # We don't expect anything else from the generator.
    443                 raise RuntimeError(what)
    444 
    445         for c in co.co_consts:
    446             if isinstance(c, type(co)):
    447                 self.scan_code(c, m)
    448 
    449     def load_package(self, fqname, pathname):
    450         self.msgin(2, "load_package", fqname, pathname)
    451         newname = replacePackageMap.get(fqname)
    452         if newname:
    453             fqname = newname
    454         m = self.add_module(fqname)
    455         m.__file__ = pathname
    456         m.__path__ = [pathname]
    457 
    458         # As per comment at top of file, simulate runtime __path__ additions.
    459         m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
    460 
    461         fp, buf, stuff = self.find_module("__init__", m.__path__)
    462         self.load_module(fqname, fp, buf, stuff)
    463         self.msgout(2, "load_package ->", m)
    464         return m
    465 
    466     def add_module(self, fqname):
    467         if fqname in self.modules:
    468             return self.modules[fqname]
    469         self.modules[fqname] = m = Module(fqname)
    470         return m
    471 
    472     def find_module(self, name, path, parent=None):
    473         if parent is not None:
    474             # assert path is not None
    475             fullname = parent.__name__+'.'+name
    476         else:
    477             fullname = name
    478         if fullname in self.excludes:
    479             self.msgout(3, "find_module -> Excluded", fullname)
    480             raise ImportError, name
    481 
    482         if path is None:
    483             if name in sys.builtin_module_names:
    484                 return (None, None, ("", "", imp.C_BUILTIN))
    485 
    486             path = self.path
    487         return imp.find_module(name, path)
    488 
    489     def report(self):
    490         """Print a report to stdout, listing the found modules with their
    491         paths, as well as modules that are missing, or seem to be missing.
    492         """
    493         print
    494         print "  %-25s %s" % ("Name", "File")
    495         print "  %-25s %s" % ("----", "----")
    496         # Print modules found
    497         keys = self.modules.keys()
    498         keys.sort()
    499         for key in keys:
    500             m = self.modules[key]
    501             if m.__path__:
    502                 print "P",
    503             else:
    504                 print "m",
    505             print "%-25s" % key, m.__file__ or ""
    506 
    507         # Print missing modules
    508         missing, maybe = self.any_missing_maybe()
    509         if missing:
    510             print
    511             print "Missing modules:"
    512             for name in missing:
    513                 mods = self.badmodules[name].keys()
    514                 mods.sort()
    515                 print "?", name, "imported from", ', '.join(mods)
    516         # Print modules that may be missing, but then again, maybe not...
    517         if maybe:
    518             print
    519             print "Submodules thay appear to be missing, but could also be",
    520             print "global names in the parent package:"
    521             for name in maybe:
    522                 mods = self.badmodules[name].keys()
    523                 mods.sort()
    524                 print "?", name, "imported from", ', '.join(mods)
    525 
    526     def any_missing(self):
    527         """Return a list of modules that appear to be missing. Use
    528         any_missing_maybe() if you want to know which modules are
    529         certain to be missing, and which *may* be missing.
    530         """
    531         missing, maybe = self.any_missing_maybe()
    532         return missing + maybe
    533 
    534     def any_missing_maybe(self):
    535         """Return two lists, one with modules that are certainly missing
    536         and one with modules that *may* be missing. The latter names could
    537         either be submodules *or* just global names in the package.
    538 
    539         The reason it can't always be determined is that it's impossible to
    540         tell which names are imported when "from module import *" is done
    541         with an extension module, short of actually importing it.
    542         """
    543         missing = []
    544         maybe = []
    545         for name in self.badmodules:
    546             if name in self.excludes:
    547                 continue
    548             i = name.rfind(".")
    549             if i < 0:
    550                 missing.append(name)
    551                 continue
    552             subname = name[i+1:]
    553             pkgname = name[:i]
    554             pkg = self.modules.get(pkgname)
    555             if pkg is not None:
    556                 if pkgname in self.badmodules[name]:
    557                     # The package tried to import this module itself and
    558                     # failed. It's definitely missing.
    559                     missing.append(name)
    560                 elif subname in pkg.globalnames:
    561                     # It's a global in the package: definitely not missing.
    562                     pass
    563                 elif pkg.starimports:
    564                     # It could be missing, but the package did an "import *"
    565                     # from a non-Python module, so we simply can't be sure.
    566                     maybe.append(name)
    567                 else:
    568                     # It's not a global in the package, the package didn't
    569                     # do funny star imports, it's very likely to be missing.
    570                     # The symbol could be inserted into the package from the
    571                     # outside, but since that's not good style we simply list
    572                     # it missing.
    573                     missing.append(name)
    574             else:
    575                 missing.append(name)
    576         missing.sort()
    577         maybe.sort()
    578         return missing, maybe
    579 
    580     def replace_paths_in_code(self, co):
    581         new_filename = original_filename = os.path.normpath(co.co_filename)
    582         for f, r in self.replace_paths:
    583             if original_filename.startswith(f):
    584                 new_filename = r + original_filename[len(f):]
    585                 break
    586 
    587         if self.debug and original_filename not in self.processed_paths:
    588             if new_filename != original_filename:
    589                 self.msgout(2, "co_filename %r changed to %r" \
    590                                     % (original_filename,new_filename,))
    591             else:
    592                 self.msgout(2, "co_filename %r remains unchanged" \
    593                                     % (original_filename,))
    594             self.processed_paths.append(original_filename)
    595 
    596         consts = list(co.co_consts)
    597         for i in range(len(consts)):
    598             if isinstance(consts[i], type(co)):
    599                 consts[i] = self.replace_paths_in_code(consts[i])
    600 
    601         return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize,
    602                          co.co_flags, co.co_code, tuple(consts), co.co_names,
    603                          co.co_varnames, new_filename, co.co_name,
    604                          co.co_firstlineno, co.co_lnotab,
    605                          co.co_freevars, co.co_cellvars)
    606 
    607 
    608 def test():
    609     # Parse command line
    610     import getopt
    611     try:
    612         opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
    613     except getopt.error, msg:
    614         print msg
    615         return
    616 
    617     # Process options
    618     debug = 1
    619     domods = 0
    620     addpath = []
    621     exclude = []
    622     for o, a in opts:
    623         if o == '-d':
    624             debug = debug + 1
    625         if o == '-m':
    626             domods = 1
    627         if o == '-p':
    628             addpath = addpath + a.split(os.pathsep)
    629         if o == '-q':
    630             debug = 0
    631         if o == '-x':
    632             exclude.append(a)
    633 
    634     # Provide default arguments
    635     if not args:
    636         script = "hello.py"
    637     else:
    638         script = args[0]
    639 
    640     # Set the path based on sys.path and the script directory
    641     path = sys.path[:]
    642     path[0] = os.path.dirname(script)
    643     path = addpath + path
    644     if debug > 1:
    645         print "path:"
    646         for item in path:
    647             print "   ", repr(item)
    648 
    649     # Create the module finder and turn its crank
    650     mf = ModuleFinder(path, debug, exclude)
    651     for arg in args[1:]:
    652         if arg == '-m':
    653             domods = 1
    654             continue
    655         if domods:
    656             if arg[-2:] == '.*':
    657                 mf.import_hook(arg[:-2], None, ["*"])
    658             else:
    659                 mf.import_hook(arg)
    660         else:
    661             mf.load_file(arg)
    662     mf.run_script(script)
    663     mf.report()
    664     return mf  # for -i debugging
    665 
    666 
    667 if __name__ == '__main__':
    668     try:
    669         mf = test()
    670     except KeyboardInterrupt:
    671         print "\n[interrupt]"
    672