1 """Find modules used by a script, using introspection.""" 2 # This module should be kept compatible with Python 2.2, see PEP 291. 3 4 from __future__ import generators 5 import dis 6 import imp 7 import marshal 8 import os 9 import sys 10 import types 11 import struct 12 13 if hasattr(sys.__stdout__, "newlines"): 14 READ_MODE = "U" # universal line endings 15 else: 16 # remain compatible with Python < 2.3 17 READ_MODE = "r" 18 19 LOAD_CONST = chr(dis.opname.index('LOAD_CONST')) 20 IMPORT_NAME = chr(dis.opname.index('IMPORT_NAME')) 21 STORE_NAME = chr(dis.opname.index('STORE_NAME')) 22 STORE_GLOBAL = chr(dis.opname.index('STORE_GLOBAL')) 23 STORE_OPS = [STORE_NAME, STORE_GLOBAL] 24 HAVE_ARGUMENT = chr(dis.HAVE_ARGUMENT) 25 26 # Modulefinder does a good job at simulating Python's, but it can not 27 # handle __path__ modifications packages make at runtime. Therefore there 28 # is a mechanism whereby you can register extra paths in this map for a 29 # package, and it will be honored. 30 31 # Note this is a mapping is lists of paths. 32 packagePathMap = {} 33 34 # A Public interface 35 def AddPackagePath(packagename, path): 36 paths = packagePathMap.get(packagename, []) 37 paths.append(path) 38 packagePathMap[packagename] = paths 39 40 replacePackageMap = {} 41 42 # This ReplacePackage mechanism allows modulefinder to work around the 43 # way the _xmlplus package injects itself under the name "xml" into 44 # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml") 45 # before running ModuleFinder. 46 47 def ReplacePackage(oldname, newname): 48 replacePackageMap[oldname] = newname 49 50 51 class Module: 52 53 def __init__(self, name, file=None, path=None): 54 self.__name__ = name 55 self.__file__ = file 56 self.__path__ = path 57 self.__code__ = None 58 # The set of global names that are assigned to in the module. 59 # This includes those names imported through starimports of 60 # Python modules. 61 self.globalnames = {} 62 # The set of starimports this module did that could not be 63 # resolved, ie. a starimport from a non-Python module. 64 self.starimports = {} 65 66 def __repr__(self): 67 s = "Module(%r" % (self.__name__,) 68 if self.__file__ is not None: 69 s = s + ", %r" % (self.__file__,) 70 if self.__path__ is not None: 71 s = s + ", %r" % (self.__path__,) 72 s = s + ")" 73 return s 74 75 class ModuleFinder: 76 77 def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]): 78 if path is None: 79 path = sys.path 80 self.path = path 81 self.modules = {} 82 self.badmodules = {} 83 self.debug = debug 84 self.indent = 0 85 self.excludes = excludes 86 self.replace_paths = replace_paths 87 self.processed_paths = [] # Used in debugging only 88 89 def msg(self, level, str, *args): 90 if level <= self.debug: 91 for i in range(self.indent): 92 print " ", 93 print str, 94 for arg in args: 95 print repr(arg), 96 print 97 98 def msgin(self, *args): 99 level = args[0] 100 if level <= self.debug: 101 self.indent = self.indent + 1 102 self.msg(*args) 103 104 def msgout(self, *args): 105 level = args[0] 106 if level <= self.debug: 107 self.indent = self.indent - 1 108 self.msg(*args) 109 110 def run_script(self, pathname): 111 self.msg(2, "run_script", pathname) 112 with open(pathname, READ_MODE) as fp: 113 stuff = ("", "r", imp.PY_SOURCE) 114 self.load_module('__main__', fp, pathname, stuff) 115 116 def load_file(self, pathname): 117 dir, name = os.path.split(pathname) 118 name, ext = os.path.splitext(name) 119 with open(pathname, READ_MODE) as fp: 120 stuff = (ext, "r", imp.PY_SOURCE) 121 self.load_module(name, fp, pathname, stuff) 122 123 def import_hook(self, name, caller=None, fromlist=None, level=-1): 124 self.msg(3, "import_hook", name, caller, fromlist, level) 125 parent = self.determine_parent(caller, level=level) 126 q, tail = self.find_head_package(parent, name) 127 m = self.load_tail(q, tail) 128 if not fromlist: 129 return q 130 if m.__path__: 131 self.ensure_fromlist(m, fromlist) 132 return None 133 134 def determine_parent(self, caller, level=-1): 135 self.msgin(4, "determine_parent", caller, level) 136 if not caller or level == 0: 137 self.msgout(4, "determine_parent -> None") 138 return None 139 pname = caller.__name__ 140 if level >= 1: # relative import 141 if caller.__path__: 142 level -= 1 143 if level == 0: 144 parent = self.modules[pname] 145 assert parent is caller 146 self.msgout(4, "determine_parent ->", parent) 147 return parent 148 if pname.count(".") < level: 149 raise ImportError, "relative importpath too deep" 150 pname = ".".join(pname.split(".")[:-level]) 151 parent = self.modules[pname] 152 self.msgout(4, "determine_parent ->", parent) 153 return parent 154 if caller.__path__: 155 parent = self.modules[pname] 156 assert caller is parent 157 self.msgout(4, "determine_parent ->", parent) 158 return parent 159 if '.' in pname: 160 i = pname.rfind('.') 161 pname = pname[:i] 162 parent = self.modules[pname] 163 assert parent.__name__ == pname 164 self.msgout(4, "determine_parent ->", parent) 165 return parent 166 self.msgout(4, "determine_parent -> None") 167 return None 168 169 def find_head_package(self, parent, name): 170 self.msgin(4, "find_head_package", parent, name) 171 if '.' in name: 172 i = name.find('.') 173 head = name[:i] 174 tail = name[i+1:] 175 else: 176 head = name 177 tail = "" 178 if parent: 179 qname = "%s.%s" % (parent.__name__, head) 180 else: 181 qname = head 182 q = self.import_module(head, qname, parent) 183 if q: 184 self.msgout(4, "find_head_package ->", (q, tail)) 185 return q, tail 186 if parent: 187 qname = head 188 parent = None 189 q = self.import_module(head, qname, parent) 190 if q: 191 self.msgout(4, "find_head_package ->", (q, tail)) 192 return q, tail 193 self.msgout(4, "raise ImportError: No module named", qname) 194 raise ImportError, "No module named " + qname 195 196 def load_tail(self, q, tail): 197 self.msgin(4, "load_tail", q, tail) 198 m = q 199 while tail: 200 i = tail.find('.') 201 if i < 0: i = len(tail) 202 head, tail = tail[:i], tail[i+1:] 203 mname = "%s.%s" % (m.__name__, head) 204 m = self.import_module(head, mname, m) 205 if not m: 206 self.msgout(4, "raise ImportError: No module named", mname) 207 raise ImportError, "No module named " + mname 208 self.msgout(4, "load_tail ->", m) 209 return m 210 211 def ensure_fromlist(self, m, fromlist, recursive=0): 212 self.msg(4, "ensure_fromlist", m, fromlist, recursive) 213 for sub in fromlist: 214 if sub == "*": 215 if not recursive: 216 all = self.find_all_submodules(m) 217 if all: 218 self.ensure_fromlist(m, all, 1) 219 elif not hasattr(m, sub): 220 subname = "%s.%s" % (m.__name__, sub) 221 submod = self.import_module(sub, subname, m) 222 if not submod: 223 raise ImportError, "No module named " + subname 224 225 def find_all_submodules(self, m): 226 if not m.__path__: 227 return 228 modules = {} 229 # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"]. 230 # But we must also collect Python extension modules - although 231 # we cannot separate normal dlls from Python extensions. 232 suffixes = [] 233 for triple in imp.get_suffixes(): 234 suffixes.append(triple[0]) 235 for dir in m.__path__: 236 try: 237 names = os.listdir(dir) 238 except os.error: 239 self.msg(2, "can't list directory", dir) 240 continue 241 for name in names: 242 mod = None 243 for suff in suffixes: 244 n = len(suff) 245 if name[-n:] == suff: 246 mod = name[:-n] 247 break 248 if mod and mod != "__init__": 249 modules[mod] = mod 250 return modules.keys() 251 252 def import_module(self, partname, fqname, parent): 253 self.msgin(3, "import_module", partname, fqname, parent) 254 try: 255 m = self.modules[fqname] 256 except KeyError: 257 pass 258 else: 259 self.msgout(3, "import_module ->", m) 260 return m 261 if fqname in self.badmodules: 262 self.msgout(3, "import_module -> None") 263 return None 264 if parent and parent.__path__ is None: 265 self.msgout(3, "import_module -> None") 266 return None 267 try: 268 fp, pathname, stuff = self.find_module(partname, 269 parent and parent.__path__, parent) 270 except ImportError: 271 self.msgout(3, "import_module ->", None) 272 return None 273 try: 274 m = self.load_module(fqname, fp, pathname, stuff) 275 finally: 276 if fp: fp.close() 277 if parent: 278 setattr(parent, partname, m) 279 self.msgout(3, "import_module ->", m) 280 return m 281 282 def load_module(self, fqname, fp, pathname, file_info): 283 suffix, mode, type = file_info 284 self.msgin(2, "load_module", fqname, fp and "fp", pathname) 285 if type == imp.PKG_DIRECTORY: 286 m = self.load_package(fqname, pathname) 287 self.msgout(2, "load_module ->", m) 288 return m 289 if type == imp.PY_SOURCE: 290 co = compile(fp.read()+'\n', pathname, 'exec') 291 elif type == imp.PY_COMPILED: 292 if fp.read(4) != imp.get_magic(): 293 self.msgout(2, "raise ImportError: Bad magic number", pathname) 294 raise ImportError, "Bad magic number in %s" % pathname 295 fp.read(4) 296 co = marshal.load(fp) 297 else: 298 co = None 299 m = self.add_module(fqname) 300 m.__file__ = pathname 301 if co: 302 if self.replace_paths: 303 co = self.replace_paths_in_code(co) 304 m.__code__ = co 305 self.scan_code(co, m) 306 self.msgout(2, "load_module ->", m) 307 return m 308 309 def _add_badmodule(self, name, caller): 310 if name not in self.badmodules: 311 self.badmodules[name] = {} 312 if caller: 313 self.badmodules[name][caller.__name__] = 1 314 else: 315 self.badmodules[name]["-"] = 1 316 317 def _safe_import_hook(self, name, caller, fromlist, level=-1): 318 # wrapper for self.import_hook() that won't raise ImportError 319 if name in self.badmodules: 320 self._add_badmodule(name, caller) 321 return 322 try: 323 self.import_hook(name, caller, level=level) 324 except ImportError, msg: 325 self.msg(2, "ImportError:", str(msg)) 326 self._add_badmodule(name, caller) 327 else: 328 if fromlist: 329 for sub in fromlist: 330 if sub in self.badmodules: 331 self._add_badmodule(sub, caller) 332 continue 333 try: 334 self.import_hook(name, caller, [sub], level=level) 335 except ImportError, msg: 336 self.msg(2, "ImportError:", str(msg)) 337 fullname = name + "." + sub 338 self._add_badmodule(fullname, caller) 339 340 def scan_opcodes(self, co, 341 unpack = struct.unpack): 342 # Scan the code, and yield 'interesting' opcode combinations 343 # Version for Python 2.4 and older 344 code = co.co_code 345 names = co.co_names 346 consts = co.co_consts 347 while code: 348 c = code[0] 349 if c in STORE_OPS: 350 oparg, = unpack('<H', code[1:3]) 351 yield "store", (names[oparg],) 352 code = code[3:] 353 continue 354 if c == LOAD_CONST and code[3] == IMPORT_NAME: 355 oparg_1, oparg_2 = unpack('<xHxH', code[:6]) 356 yield "import", (consts[oparg_1], names[oparg_2]) 357 code = code[6:] 358 continue 359 if c >= HAVE_ARGUMENT: 360 code = code[3:] 361 else: 362 code = code[1:] 363 364 def scan_opcodes_25(self, co, 365 unpack = struct.unpack): 366 # Scan the code, and yield 'interesting' opcode combinations 367 # Python 2.5 version (has absolute and relative imports) 368 code = co.co_code 369 names = co.co_names 370 consts = co.co_consts 371 LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME 372 while code: 373 c = code[0] 374 if c in STORE_OPS: 375 oparg, = unpack('<H', code[1:3]) 376 yield "store", (names[oparg],) 377 code = code[3:] 378 continue 379 if code[:9:3] == LOAD_LOAD_AND_IMPORT: 380 oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9]) 381 level = consts[oparg_1] 382 if level == -1: # normal import 383 yield "import", (consts[oparg_2], names[oparg_3]) 384 elif level == 0: # absolute import 385 yield "absolute_import", (consts[oparg_2], names[oparg_3]) 386 else: # relative import 387 yield "relative_import", (level, consts[oparg_2], names[oparg_3]) 388 code = code[9:] 389 continue 390 if c >= HAVE_ARGUMENT: 391 code = code[3:] 392 else: 393 code = code[1:] 394 395 def scan_code(self, co, m): 396 code = co.co_code 397 if sys.version_info >= (2, 5): 398 scanner = self.scan_opcodes_25 399 else: 400 scanner = self.scan_opcodes 401 for what, args in scanner(co): 402 if what == "store": 403 name, = args 404 m.globalnames[name] = 1 405 elif what in ("import", "absolute_import"): 406 fromlist, name = args 407 have_star = 0 408 if fromlist is not None: 409 if "*" in fromlist: 410 have_star = 1 411 fromlist = [f for f in fromlist if f != "*"] 412 if what == "absolute_import": level = 0 413 else: level = -1 414 self._safe_import_hook(name, m, fromlist, level=level) 415 if have_star: 416 # We've encountered an "import *". If it is a Python module, 417 # the code has already been parsed and we can suck out the 418 # global names. 419 mm = None 420 if m.__path__: 421 # At this point we don't know whether 'name' is a 422 # submodule of 'm' or a global module. Let's just try 423 # the full name first. 424 mm = self.modules.get(m.__name__ + "." + name) 425 if mm is None: 426 mm = self.modules.get(name) 427 if mm is not None: 428 m.globalnames.update(mm.globalnames) 429 m.starimports.update(mm.starimports) 430 if mm.__code__ is None: 431 m.starimports[name] = 1 432 else: 433 m.starimports[name] = 1 434 elif what == "relative_import": 435 level, fromlist, name = args 436 if name: 437 self._safe_import_hook(name, m, fromlist, level=level) 438 else: 439 parent = self.determine_parent(m, level=level) 440 self._safe_import_hook(parent.__name__, None, fromlist, level=0) 441 else: 442 # We don't expect anything else from the generator. 443 raise RuntimeError(what) 444 445 for c in co.co_consts: 446 if isinstance(c, type(co)): 447 self.scan_code(c, m) 448 449 def load_package(self, fqname, pathname): 450 self.msgin(2, "load_package", fqname, pathname) 451 newname = replacePackageMap.get(fqname) 452 if newname: 453 fqname = newname 454 m = self.add_module(fqname) 455 m.__file__ = pathname 456 m.__path__ = [pathname] 457 458 # As per comment at top of file, simulate runtime __path__ additions. 459 m.__path__ = m.__path__ + packagePathMap.get(fqname, []) 460 461 fp, buf, stuff = self.find_module("__init__", m.__path__) 462 self.load_module(fqname, fp, buf, stuff) 463 self.msgout(2, "load_package ->", m) 464 if fp: 465 fp.close() 466 return m 467 468 def add_module(self, fqname): 469 if fqname in self.modules: 470 return self.modules[fqname] 471 self.modules[fqname] = m = Module(fqname) 472 return m 473 474 def find_module(self, name, path, parent=None): 475 if parent is not None: 476 # assert path is not None 477 fullname = parent.__name__+'.'+name 478 else: 479 fullname = name 480 if fullname in self.excludes: 481 self.msgout(3, "find_module -> Excluded", fullname) 482 raise ImportError, name 483 484 if path is None: 485 if name in sys.builtin_module_names: 486 return (None, None, ("", "", imp.C_BUILTIN)) 487 488 path = self.path 489 return imp.find_module(name, path) 490 491 def report(self): 492 """Print a report to stdout, listing the found modules with their 493 paths, as well as modules that are missing, or seem to be missing. 494 """ 495 print 496 print " %-25s %s" % ("Name", "File") 497 print " %-25s %s" % ("----", "----") 498 # Print modules found 499 keys = self.modules.keys() 500 keys.sort() 501 for key in keys: 502 m = self.modules[key] 503 if m.__path__: 504 print "P", 505 else: 506 print "m", 507 print "%-25s" % key, m.__file__ or "" 508 509 # Print missing modules 510 missing, maybe = self.any_missing_maybe() 511 if missing: 512 print 513 print "Missing modules:" 514 for name in missing: 515 mods = self.badmodules[name].keys() 516 mods.sort() 517 print "?", name, "imported from", ', '.join(mods) 518 # Print modules that may be missing, but then again, maybe not... 519 if maybe: 520 print 521 print "Submodules that appear to be missing, but could also be", 522 print "global names in the parent package:" 523 for name in maybe: 524 mods = self.badmodules[name].keys() 525 mods.sort() 526 print "?", name, "imported from", ', '.join(mods) 527 528 def any_missing(self): 529 """Return a list of modules that appear to be missing. Use 530 any_missing_maybe() if you want to know which modules are 531 certain to be missing, and which *may* be missing. 532 """ 533 missing, maybe = self.any_missing_maybe() 534 return missing + maybe 535 536 def any_missing_maybe(self): 537 """Return two lists, one with modules that are certainly missing 538 and one with modules that *may* be missing. The latter names could 539 either be submodules *or* just global names in the package. 540 541 The reason it can't always be determined is that it's impossible to 542 tell which names are imported when "from module import *" is done 543 with an extension module, short of actually importing it. 544 """ 545 missing = [] 546 maybe = [] 547 for name in self.badmodules: 548 if name in self.excludes: 549 continue 550 i = name.rfind(".") 551 if i < 0: 552 missing.append(name) 553 continue 554 subname = name[i+1:] 555 pkgname = name[:i] 556 pkg = self.modules.get(pkgname) 557 if pkg is not None: 558 if pkgname in self.badmodules[name]: 559 # The package tried to import this module itself and 560 # failed. It's definitely missing. 561 missing.append(name) 562 elif subname in pkg.globalnames: 563 # It's a global in the package: definitely not missing. 564 pass 565 elif pkg.starimports: 566 # It could be missing, but the package did an "import *" 567 # from a non-Python module, so we simply can't be sure. 568 maybe.append(name) 569 else: 570 # It's not a global in the package, the package didn't 571 # do funny star imports, it's very likely to be missing. 572 # The symbol could be inserted into the package from the 573 # outside, but since that's not good style we simply list 574 # it missing. 575 missing.append(name) 576 else: 577 missing.append(name) 578 missing.sort() 579 maybe.sort() 580 return missing, maybe 581 582 def replace_paths_in_code(self, co): 583 new_filename = original_filename = os.path.normpath(co.co_filename) 584 for f, r in self.replace_paths: 585 if original_filename.startswith(f): 586 new_filename = r + original_filename[len(f):] 587 break 588 589 if self.debug and original_filename not in self.processed_paths: 590 if new_filename != original_filename: 591 self.msgout(2, "co_filename %r changed to %r" \ 592 % (original_filename,new_filename,)) 593 else: 594 self.msgout(2, "co_filename %r remains unchanged" \ 595 % (original_filename,)) 596 self.processed_paths.append(original_filename) 597 598 consts = list(co.co_consts) 599 for i in range(len(consts)): 600 if isinstance(consts[i], type(co)): 601 consts[i] = self.replace_paths_in_code(consts[i]) 602 603 return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize, 604 co.co_flags, co.co_code, tuple(consts), co.co_names, 605 co.co_varnames, new_filename, co.co_name, 606 co.co_firstlineno, co.co_lnotab, 607 co.co_freevars, co.co_cellvars) 608 609 610 def test(): 611 # Parse command line 612 import getopt 613 try: 614 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") 615 except getopt.error, msg: 616 print msg 617 return 618 619 # Process options 620 debug = 1 621 domods = 0 622 addpath = [] 623 exclude = [] 624 for o, a in opts: 625 if o == '-d': 626 debug = debug + 1 627 if o == '-m': 628 domods = 1 629 if o == '-p': 630 addpath = addpath + a.split(os.pathsep) 631 if o == '-q': 632 debug = 0 633 if o == '-x': 634 exclude.append(a) 635 636 # Provide default arguments 637 if not args: 638 script = "hello.py" 639 else: 640 script = args[0] 641 642 # Set the path based on sys.path and the script directory 643 path = sys.path[:] 644 path[0] = os.path.dirname(script) 645 path = addpath + path 646 if debug > 1: 647 print "path:" 648 for item in path: 649 print " ", repr(item) 650 651 # Create the module finder and turn its crank 652 mf = ModuleFinder(path, debug, exclude) 653 for arg in args[1:]: 654 if arg == '-m': 655 domods = 1 656 continue 657 if domods: 658 if arg[-2:] == '.*': 659 mf.import_hook(arg[:-2], None, ["*"]) 660 else: 661 mf.import_hook(arg) 662 else: 663 mf.load_file(arg) 664 mf.run_script(script) 665 mf.report() 666 return mf # for -i debugging 667 668 669 if __name__ == '__main__': 670 try: 671 mf = test() 672 except KeyboardInterrupt: 673 print "\n[interrupt]" 674