1 """ 2 Import utilities 3 4 Exported classes: 5 ImportManager Manage the import process 6 7 Importer Base class for replacing standard import functions 8 BuiltinImporter Emulate the import mechanism for builtin and frozen modules 9 10 DynLoadSuffixImporter 11 """ 12 from warnings import warnpy3k 13 warnpy3k("the imputil module has been removed in Python 3.0", stacklevel=2) 14 del warnpy3k 15 16 # note: avoid importing non-builtin modules 17 import imp ### not available in Jython? 18 import sys 19 import __builtin__ 20 21 # for the DirectoryImporter 22 import struct 23 import marshal 24 25 __all__ = ["ImportManager","Importer","BuiltinImporter"] 26 27 _StringType = type('') 28 _ModuleType = type(sys) ### doesn't work in Jython... 29 30 class ImportManager: 31 "Manage the import process." 32 33 def install(self, namespace=vars(__builtin__)): 34 "Install this ImportManager into the specified namespace." 35 36 if isinstance(namespace, _ModuleType): 37 namespace = vars(namespace) 38 39 # Note: we have no notion of "chaining" 40 41 # Record the previous import hook, then install our own. 42 self.previous_importer = namespace['__import__'] 43 self.namespace = namespace 44 namespace['__import__'] = self._import_hook 45 46 ### fix this 47 #namespace['reload'] = self._reload_hook 48 49 def uninstall(self): 50 "Restore the previous import mechanism." 51 self.namespace['__import__'] = self.previous_importer 52 53 def add_suffix(self, suffix, importFunc): 54 assert hasattr(importFunc, '__call__') 55 self.fs_imp.add_suffix(suffix, importFunc) 56 57 ###################################################################### 58 # 59 # PRIVATE METHODS 60 # 61 62 clsFilesystemImporter = None 63 64 def __init__(self, fs_imp=None): 65 # we're definitely going to be importing something in the future, 66 # so let's just load the OS-related facilities. 67 if not _os_stat: 68 _os_bootstrap() 69 70 # This is the Importer that we use for grabbing stuff from the 71 # filesystem. It defines one more method (import_from_dir) for our use. 72 if fs_imp is None: 73 cls = self.clsFilesystemImporter or _FilesystemImporter 74 fs_imp = cls() 75 self.fs_imp = fs_imp 76 77 # Initialize the set of suffixes that we recognize and import. 78 # The default will import dynamic-load modules first, followed by 79 # .py files (or a .py file's cached bytecode) 80 for desc in imp.get_suffixes(): 81 if desc[2] == imp.C_EXTENSION: 82 self.add_suffix(desc[0], 83 DynLoadSuffixImporter(desc).import_file) 84 self.add_suffix('.py', py_suffix_importer) 85 86 def _import_hook(self, fqname, globals=None, locals=None, fromlist=None): 87 """Python calls this hook to locate and import a module.""" 88 89 parts = fqname.split('.') 90 91 # determine the context of this import 92 parent = self._determine_import_context(globals) 93 94 # if there is a parent, then its importer should manage this import 95 if parent: 96 module = parent.__importer__._do_import(parent, parts, fromlist) 97 if module: 98 return module 99 100 # has the top module already been imported? 101 try: 102 top_module = sys.modules[parts[0]] 103 except KeyError: 104 105 # look for the topmost module 106 top_module = self._import_top_module(parts[0]) 107 if not top_module: 108 # the topmost module wasn't found at all. 109 raise ImportError, 'No module named ' + fqname 110 111 # fast-path simple imports 112 if len(parts) == 1: 113 if not fromlist: 114 return top_module 115 116 if not top_module.__dict__.get('__ispkg__'): 117 # __ispkg__ isn't defined (the module was not imported by us), 118 # or it is zero. 119 # 120 # In the former case, there is no way that we could import 121 # sub-modules that occur in the fromlist (but we can't raise an 122 # error because it may just be names) because we don't know how 123 # to deal with packages that were imported by other systems. 124 # 125 # In the latter case (__ispkg__ == 0), there can't be any sub- 126 # modules present, so we can just return. 127 # 128 # In both cases, since len(parts) == 1, the top_module is also 129 # the "bottom" which is the defined return when a fromlist 130 # exists. 131 return top_module 132 133 importer = top_module.__dict__.get('__importer__') 134 if importer: 135 return importer._finish_import(top_module, parts[1:], fromlist) 136 137 # Grrr, some people "import os.path" or do "from os.path import ..." 138 if len(parts) == 2 and hasattr(top_module, parts[1]): 139 if fromlist: 140 return getattr(top_module, parts[1]) 141 else: 142 return top_module 143 144 # If the importer does not exist, then we have to bail. A missing 145 # importer means that something else imported the module, and we have 146 # no knowledge of how to get sub-modules out of the thing. 147 raise ImportError, 'No module named ' + fqname 148 149 def _determine_import_context(self, globals): 150 """Returns the context in which a module should be imported. 151 152 The context could be a loaded (package) module and the imported module 153 will be looked for within that package. The context could also be None, 154 meaning there is no context -- the module should be looked for as a 155 "top-level" module. 156 """ 157 158 if not globals or not globals.get('__importer__'): 159 # globals does not refer to one of our modules or packages. That 160 # implies there is no relative import context (as far as we are 161 # concerned), and it should just pick it off the standard path. 162 return None 163 164 # The globals refer to a module or package of ours. It will define 165 # the context of the new import. Get the module/package fqname. 166 parent_fqname = globals['__name__'] 167 168 # if a package is performing the import, then return itself (imports 169 # refer to pkg contents) 170 if globals['__ispkg__']: 171 parent = sys.modules[parent_fqname] 172 assert globals is parent.__dict__ 173 return parent 174 175 i = parent_fqname.rfind('.') 176 177 # a module outside of a package has no particular import context 178 if i == -1: 179 return None 180 181 # if a module in a package is performing the import, then return the 182 # package (imports refer to siblings) 183 parent_fqname = parent_fqname[:i] 184 parent = sys.modules[parent_fqname] 185 assert parent.__name__ == parent_fqname 186 return parent 187 188 def _import_top_module(self, name): 189 # scan sys.path looking for a location in the filesystem that contains 190 # the module, or an Importer object that can import the module. 191 for item in sys.path: 192 if isinstance(item, _StringType): 193 module = self.fs_imp.import_from_dir(item, name) 194 else: 195 module = item.import_top(name) 196 if module: 197 return module 198 return None 199 200 def _reload_hook(self, module): 201 "Python calls this hook to reload a module." 202 203 # reloading of a module may or may not be possible (depending on the 204 # importer), but at least we can validate that it's ours to reload 205 importer = module.__dict__.get('__importer__') 206 if not importer: 207 ### oops. now what... 208 pass 209 210 # okay. it is using the imputil system, and we must delegate it, but 211 # we don't know what to do (yet) 212 ### we should blast the module dict and do another get_code(). need to 213 ### flesh this out and add proper docco... 214 raise SystemError, "reload not yet implemented" 215 216 217 class Importer: 218 "Base class for replacing standard import functions." 219 220 def import_top(self, name): 221 "Import a top-level module." 222 return self._import_one(None, name, name) 223 224 ###################################################################### 225 # 226 # PRIVATE METHODS 227 # 228 def _finish_import(self, top, parts, fromlist): 229 # if "a.b.c" was provided, then load the ".b.c" portion down from 230 # below the top-level module. 231 bottom = self._load_tail(top, parts) 232 233 # if the form is "import a.b.c", then return "a" 234 if not fromlist: 235 # no fromlist: return the top of the import tree 236 return top 237 238 # the top module was imported by self. 239 # 240 # this means that the bottom module was also imported by self (just 241 # now, or in the past and we fetched it from sys.modules). 242 # 243 # since we imported/handled the bottom module, this means that we can 244 # also handle its fromlist (and reliably use __ispkg__). 245 246 # if the bottom node is a package, then (potentially) import some 247 # modules. 248 # 249 # note: if it is not a package, then "fromlist" refers to names in 250 # the bottom module rather than modules. 251 # note: for a mix of names and modules in the fromlist, we will 252 # import all modules and insert those into the namespace of 253 # the package module. Python will pick up all fromlist names 254 # from the bottom (package) module; some will be modules that 255 # we imported and stored in the namespace, others are expected 256 # to be present already. 257 if bottom.__ispkg__: 258 self._import_fromlist(bottom, fromlist) 259 260 # if the form is "from a.b import c, d" then return "b" 261 return bottom 262 263 def _import_one(self, parent, modname, fqname): 264 "Import a single module." 265 266 # has the module already been imported? 267 try: 268 return sys.modules[fqname] 269 except KeyError: 270 pass 271 272 # load the module's code, or fetch the module itself 273 result = self.get_code(parent, modname, fqname) 274 if result is None: 275 return None 276 277 module = self._process_result(result, fqname) 278 279 # insert the module into its parent 280 if parent: 281 setattr(parent, modname, module) 282 return module 283 284 def _process_result(self, result, fqname): 285 ispkg, code, values = result 286 # did get_code() return an actual module? (rather than a code object) 287 is_module = isinstance(code, _ModuleType) 288 289 # use the returned module, or create a new one to exec code into 290 if is_module: 291 module = code 292 else: 293 module = imp.new_module(fqname) 294 295 ### record packages a bit differently?? 296 module.__importer__ = self 297 module.__ispkg__ = ispkg 298 299 # insert additional values into the module (before executing the code) 300 module.__dict__.update(values) 301 302 # the module is almost ready... make it visible 303 sys.modules[fqname] = module 304 305 # execute the code within the module's namespace 306 if not is_module: 307 try: 308 exec code in module.__dict__ 309 except: 310 if fqname in sys.modules: 311 del sys.modules[fqname] 312 raise 313 314 # fetch from sys.modules instead of returning module directly. 315 # also make module's __name__ agree with fqname, in case 316 # the "exec code in module.__dict__" played games on us. 317 module = sys.modules[fqname] 318 module.__name__ = fqname 319 return module 320 321 def _load_tail(self, m, parts): 322 """Import the rest of the modules, down from the top-level module. 323 324 Returns the last module in the dotted list of modules. 325 """ 326 for part in parts: 327 fqname = "%s.%s" % (m.__name__, part) 328 m = self._import_one(m, part, fqname) 329 if not m: 330 raise ImportError, "No module named " + fqname 331 return m 332 333 def _import_fromlist(self, package, fromlist): 334 'Import any sub-modules in the "from" list.' 335 336 # if '*' is present in the fromlist, then look for the '__all__' 337 # variable to find additional items (modules) to import. 338 if '*' in fromlist: 339 fromlist = list(fromlist) + \ 340 list(package.__dict__.get('__all__', [])) 341 342 for sub in fromlist: 343 # if the name is already present, then don't try to import it (it 344 # might not be a module!). 345 if sub != '*' and not hasattr(package, sub): 346 subname = "%s.%s" % (package.__name__, sub) 347 submod = self._import_one(package, sub, subname) 348 if not submod: 349 raise ImportError, "cannot import name " + subname 350 351 def _do_import(self, parent, parts, fromlist): 352 """Attempt to import the module relative to parent. 353 354 This method is used when the import context specifies that <self> 355 imported the parent module. 356 """ 357 top_name = parts[0] 358 top_fqname = parent.__name__ + '.' + top_name 359 top_module = self._import_one(parent, top_name, top_fqname) 360 if not top_module: 361 # this importer and parent could not find the module (relatively) 362 return None 363 364 return self._finish_import(top_module, parts[1:], fromlist) 365 366 ###################################################################### 367 # 368 # METHODS TO OVERRIDE 369 # 370 def get_code(self, parent, modname, fqname): 371 """Find and retrieve the code for the given module. 372 373 parent specifies a parent module to define a context for importing. It 374 may be None, indicating no particular context for the search. 375 376 modname specifies a single module (not dotted) within the parent. 377 378 fqname specifies the fully-qualified module name. This is a 379 (potentially) dotted name from the "root" of the module namespace 380 down to the modname. 381 If there is no parent, then modname==fqname. 382 383 This method should return None, or a 3-tuple. 384 385 * If the module was not found, then None should be returned. 386 387 * The first item of the 2- or 3-tuple should be the integer 0 or 1, 388 specifying whether the module that was found is a package or not. 389 390 * The second item is the code object for the module (it will be 391 executed within the new module's namespace). This item can also 392 be a fully-loaded module object (e.g. loaded from a shared lib). 393 394 * The third item is a dictionary of name/value pairs that will be 395 inserted into new module before the code object is executed. This 396 is provided in case the module's code expects certain values (such 397 as where the module was found). When the second item is a module 398 object, then these names/values will be inserted *after* the module 399 has been loaded/initialized. 400 """ 401 raise RuntimeError, "get_code not implemented" 402 403 404 ###################################################################### 405 # 406 # Some handy stuff for the Importers 407 # 408 409 # byte-compiled file suffix character 410 _suffix_char = __debug__ and 'c' or 'o' 411 412 # byte-compiled file suffix 413 _suffix = '.py' + _suffix_char 414 415 def _compile(pathname, timestamp): 416 """Compile (and cache) a Python source file. 417 418 The file specified by <pathname> is compiled to a code object and 419 returned. 420 421 Presuming the appropriate privileges exist, the bytecodes will be 422 saved back to the filesystem for future imports. The source file's 423 modification timestamp must be provided as a Long value. 424 """ 425 codestring = open(pathname, 'rU').read() 426 if codestring and codestring[-1] != '\n': 427 codestring = codestring + '\n' 428 code = __builtin__.compile(codestring, pathname, 'exec') 429 430 # try to cache the compiled code 431 try: 432 f = open(pathname + _suffix_char, 'wb') 433 except IOError: 434 pass 435 else: 436 f.write('\0\0\0\0') 437 f.write(struct.pack('<I', timestamp)) 438 marshal.dump(code, f) 439 f.flush() 440 f.seek(0, 0) 441 f.write(imp.get_magic()) 442 f.close() 443 444 return code 445 446 _os_stat = _os_path_join = None 447 def _os_bootstrap(): 448 "Set up 'os' module replacement functions for use during import bootstrap." 449 450 names = sys.builtin_module_names 451 452 join = None 453 if 'posix' in names: 454 sep = '/' 455 from posix import stat 456 elif 'nt' in names: 457 sep = '\\' 458 from nt import stat 459 elif 'dos' in names: 460 sep = '\\' 461 from dos import stat 462 elif 'os2' in names: 463 sep = '\\' 464 from os2 import stat 465 else: 466 raise ImportError, 'no os specific module found' 467 468 if join is None: 469 def join(a, b, sep=sep): 470 if a == '': 471 return b 472 lastchar = a[-1:] 473 if lastchar == '/' or lastchar == sep: 474 return a + b 475 return a + sep + b 476 477 global _os_stat 478 _os_stat = stat 479 480 global _os_path_join 481 _os_path_join = join 482 483 def _os_path_isdir(pathname): 484 "Local replacement for os.path.isdir()." 485 try: 486 s = _os_stat(pathname) 487 except OSError: 488 return None 489 return (s.st_mode & 0170000) == 0040000 490 491 def _timestamp(pathname): 492 "Return the file modification time as a Long." 493 try: 494 s = _os_stat(pathname) 495 except OSError: 496 return None 497 return long(s.st_mtime) 498 499 500 ###################################################################### 501 # 502 # Emulate the import mechanism for builtin and frozen modules 503 # 504 class BuiltinImporter(Importer): 505 def get_code(self, parent, modname, fqname): 506 if parent: 507 # these modules definitely do not occur within a package context 508 return None 509 510 # look for the module 511 if imp.is_builtin(modname): 512 type = imp.C_BUILTIN 513 elif imp.is_frozen(modname): 514 type = imp.PY_FROZEN 515 else: 516 # not found 517 return None 518 519 # got it. now load and return it. 520 module = imp.load_module(modname, None, modname, ('', '', type)) 521 return 0, module, { } 522 523 524 ###################################################################### 525 # 526 # Internal importer used for importing from the filesystem 527 # 528 class _FilesystemImporter(Importer): 529 def __init__(self): 530 self.suffixes = [ ] 531 532 def add_suffix(self, suffix, importFunc): 533 assert hasattr(importFunc, '__call__') 534 self.suffixes.append((suffix, importFunc)) 535 536 def import_from_dir(self, dir, fqname): 537 result = self._import_pathname(_os_path_join(dir, fqname), fqname) 538 if result: 539 return self._process_result(result, fqname) 540 return None 541 542 def get_code(self, parent, modname, fqname): 543 # This importer is never used with an empty parent. Its existence is 544 # private to the ImportManager. The ImportManager uses the 545 # import_from_dir() method to import top-level modules/packages. 546 # This method is only used when we look for a module within a package. 547 assert parent 548 549 for submodule_path in parent.__path__: 550 code = self._import_pathname(_os_path_join(submodule_path, modname), fqname) 551 if code is not None: 552 return code 553 return self._import_pathname(_os_path_join(parent.__pkgdir__, modname), 554 fqname) 555 556 def _import_pathname(self, pathname, fqname): 557 if _os_path_isdir(pathname): 558 result = self._import_pathname(_os_path_join(pathname, '__init__'), 559 fqname) 560 if result: 561 values = result[2] 562 values['__pkgdir__'] = pathname 563 values['__path__'] = [ pathname ] 564 return 1, result[1], values 565 return None 566 567 for suffix, importFunc in self.suffixes: 568 filename = pathname + suffix 569 try: 570 finfo = _os_stat(filename) 571 except OSError: 572 pass 573 else: 574 return importFunc(filename, finfo, fqname) 575 return None 576 577 ###################################################################### 578 # 579 # SUFFIX-BASED IMPORTERS 580 # 581 582 def py_suffix_importer(filename, finfo, fqname): 583 file = filename[:-3] + _suffix 584 t_py = long(finfo[8]) 585 t_pyc = _timestamp(file) 586 587 code = None 588 if t_pyc is not None and t_pyc >= t_py: 589 f = open(file, 'rb') 590 if f.read(4) == imp.get_magic(): 591 t = struct.unpack('<I', f.read(4))[0] 592 if t == t_py: 593 code = marshal.load(f) 594 f.close() 595 if code is None: 596 file = filename 597 code = _compile(file, t_py) 598 599 return 0, code, { '__file__' : file } 600 601 class DynLoadSuffixImporter: 602 def __init__(self, desc): 603 self.desc = desc 604 605 def import_file(self, filename, finfo, fqname): 606 fp = open(filename, self.desc[1]) 607 module = imp.load_module(fqname, fp, filename, self.desc) 608 module.__file__ = filename 609 return 0, module, { } 610 611 612 ###################################################################### 613 614 def _print_importers(): 615 items = sys.modules.items() 616 items.sort() 617 for name, module in items: 618 if module: 619 print name, module.__dict__.get('__importer__', '-- no importer') 620 else: 621 print name, '-- non-existent module' 622 623 def _test_revamp(): 624 ImportManager().install() 625 sys.path.insert(0, BuiltinImporter()) 626 627 ###################################################################### 628 629 # 630 # TODO 631 # 632 # from Finn Bock: 633 # type(sys) is not a module in Jython. what to use instead? 634 # imp.C_EXTENSION is not in Jython. same for get_suffixes and new_module 635 # 636 # given foo.py of: 637 # import sys 638 # sys.modules['foo'] = sys 639 # 640 # ---- standard import mechanism 641 # >>> import foo 642 # >>> foo 643 # <module 'sys' (built-in)> 644 # 645 # ---- revamped import mechanism 646 # >>> import imputil 647 # >>> imputil._test_revamp() 648 # >>> import foo 649 # >>> foo 650 # <module 'foo' from 'foo.py'> 651 # 652 # 653 # from MAL: 654 # should BuiltinImporter exist in sys.path or hard-wired in ImportManager? 655 # need __path__ processing 656 # performance 657 # move chaining to a subclass [gjs: it's been nuked] 658 # deinstall should be possible 659 # query mechanism needed: is a specific Importer installed? 660 # py/pyc/pyo piping hooks to filter/process these files 661 # wish list: 662 # distutils importer hooked to list of standard Internet repositories 663 # module->file location mapper to speed FS-based imports 664 # relative imports 665 # keep chaining so that it can play nice with other import hooks 666 # 667 # from Gordon: 668 # push MAL's mapper into sys.path[0] as a cache (hard-coded for apps) 669 # 670 # from Guido: 671 # need to change sys.* references for rexec environs 672 # need hook for MAL's walk-me-up import strategy, or Tim's absolute strategy 673 # watch out for sys.modules[...] is None 674 # flag to force absolute imports? (speeds _determine_import_context and 675 # checking for a relative module) 676 # insert names of archives into sys.path (see quote below) 677 # note: reload does NOT blast module dict 678 # shift import mechanisms and policies around; provide for hooks, overrides 679 # (see quote below) 680 # add get_source stuff 681 # get_topcode and get_subcode 682 # CRLF handling in _compile 683 # race condition in _compile 684 # refactoring of os.py to deal with _os_bootstrap problem 685 # any special handling to do for importing a module with a SyntaxError? 686 # (e.g. clean up the traceback) 687 # implement "domain" for path-type functionality using pkg namespace 688 # (rather than FS-names like __path__) 689 # don't use the word "private"... maybe "internal" 690 # 691 # 692 # Guido's comments on sys.path caching: 693 # 694 # We could cache this in a dictionary: the ImportManager can have a 695 # cache dict mapping pathnames to importer objects, and a separate 696 # method for coming up with an importer given a pathname that's not yet 697 # in the cache. The method should do a stat and/or look at the 698 # extension to decide which importer class to use; you can register new 699 # importer classes by registering a suffix or a Boolean function, plus a 700 # class. If you register a new importer class, the cache is zapped. 701 # The cache is independent from sys.path (but maintained per 702 # ImportManager instance) so that rearrangements of sys.path do the 703 # right thing. If a path is dropped from sys.path the corresponding 704 # cache entry is simply no longer used. 705 # 706 # My/Guido's comments on factoring ImportManager and Importer: 707 # 708 # > However, we still have a tension occurring here: 709 # > 710 # > 1) implementing policy in ImportManager assists in single-point policy 711 # > changes for app/rexec situations 712 # > 2) implementing policy in Importer assists in package-private policy 713 # > changes for normal, operating conditions 714 # > 715 # > I'll see if I can sort out a way to do this. Maybe the Importer class will 716 # > implement the methods (which can be overridden to change policy) by 717 # > delegating to ImportManager. 718 # 719 # Maybe also think about what kind of policies an Importer would be 720 # likely to want to change. I have a feeling that a lot of the code 721 # there is actually not so much policy but a *necessity* to get things 722 # working given the calling conventions for the __import__ hook: whether 723 # to return the head or tail of a dotted name, or when to do the "finish 724 # fromlist" stuff. 725 # 726