Home | History | Annotate | Download | only in modulegraph
      1 """
      2 modulegraph.find_modules - High-level module dependency finding interface
      3 =========================================================================
      4 
      5 History
      6 ........
      7 
      8 Originally (loosely) based on code in py2exe's build_exe.py by Thomas Heller.
      9 """
     10 from __future__ import absolute_import
     11 
     12 import sys
     13 import os
     14 import imp
     15 import warnings
     16 
     17 import modulegraph.modulegraph as modulegraph
     18 from modulegraph.modulegraph import Alias, Script, Extension
     19 from modulegraph.util import imp_find_module
     20 
     21 __all__ = [
     22     'find_modules', 'parse_mf_results'
     23 ]
     24 
     25 def get_implies():
     26     result = {
     27         # imports done from builtin modules in C code (untrackable by modulegraph)
     28         "_curses":      ["curses"],
     29         "posix":        ["resource"],
     30         "gc":           ["time"],
     31         "time":         ["_strptime"],
     32         "datetime":     ["time"],
     33         "MacOS":        ["macresource"],
     34         "cPickle":      ["copy_reg", "cStringIO"],
     35         "parser":       ["copy_reg"],
     36         "codecs":       ["encodings"],
     37         "cStringIO":    ["copy_reg"],
     38         "_sre":         ["copy", "string", "sre"],
     39         "zipimport":    ["zlib"],
     40 
     41         # Python 3.2:
     42         "_datetime":    ["time", "_strptime"],
     43         "_json":        ["json.decoder"],
     44         "_pickle":      ["codecs", "copyreg", "_compat_pickle"],
     45         "_posixsubprocess": ["gc"],
     46         "_ssl":         ["socket"],
     47 
     48         # Python 3.3:
     49         "_elementtree": ["copy", "xml.etree.ElementPath" ],
     50 
     51         # mactoolboxglue can do a bunch more of these
     52         # that are far harder to predict, these should be tracked
     53         # manually for now.
     54 
     55         # this isn't C, but it uses __import__
     56         "anydbm":       ["dbhash", "gdbm", "dbm", "dumbdbm", "whichdb"],
     57         # package aliases
     58         "wxPython.wx":  Alias('wx'),
     59 
     60     }
     61 
     62     if sys.version_info[0] == 3:
     63         result["_sre"] = ["copy", "re"]
     64         result["parser"] = ["copyreg"]
     65 
     66         # _frozen_importlib is part of the interpreter itself
     67         result["_frozen_importlib"] = None
     68 
     69     if sys.version_info[0] == 2 and sys.version_info[1] >= 5:
     70         result.update({
     71             "email.base64MIME":         Alias("email.base64mime"),
     72             "email.Charset":            Alias("email.charset"),
     73             "email.Encoders":           Alias("email.encoders"),
     74             "email.Errors":             Alias("email.errors"),
     75             "email.Feedparser":         Alias("email.feedParser"),
     76             "email.Generator":          Alias("email.generator"),
     77             "email.Header":             Alias("email.header"),
     78             "email.Iterators":          Alias("email.iterators"),
     79             "email.Message":            Alias("email.message"),
     80             "email.Parser":             Alias("email.parser"),
     81             "email.quopriMIME":         Alias("email.quoprimime"),
     82             "email.Utils":              Alias("email.utils"),
     83             "email.MIMEAudio":          Alias("email.mime.audio"),
     84             "email.MIMEBase":           Alias("email.mime.base"),
     85             "email.MIMEImage":          Alias("email.mime.image"),
     86             "email.MIMEMessage":        Alias("email.mime.message"),
     87             "email.MIMEMultipart":      Alias("email.mime.multipart"),
     88             "email.MIMENonMultipart":   Alias("email.mime.nonmultipart"),
     89             "email.MIMEText":           Alias("email.mime.text"),
     90         })
     91 
     92     if sys.version_info[:2] >= (2, 5):
     93         result["_elementtree"] = ["pyexpat"]
     94 
     95         import xml.etree
     96         files = os.listdir(xml.etree.__path__[0])
     97         for fn in files:
     98             if fn.endswith('.py') and fn != "__init__.py":
     99                 result["_elementtree"].append("xml.etree.%s"%(fn[:-3],))
    100 
    101     if sys.version_info[:2] >= (2, 6):
    102         result['future_builtins'] = ['itertools']
    103 
    104     # os.path is an alias for a platform specific submodule,
    105     # ensure that the graph shows this.
    106     result['os.path'] = Alias(os.path.__name__)
    107 
    108 
    109     return result
    110 
    111 def parse_mf_results(mf):
    112     """
    113     Return two lists: the first one contains the python files in the graph,
    114     the second the C extensions.
    115 
    116     :param mf: a :class:`modulegraph.modulegraph.ModuleGraph` instance
    117     """
    118     #for name, imports in get_hidden_imports().items():
    119     #    if name in mf.modules.keys():
    120     #        for mod in imports:
    121     #            mf.import_hook(mod)
    122 
    123     # Retrieve modules from modulegraph
    124     py_files = []
    125     extensions = []
    126 
    127     for item in mf.flatten():
    128         # There may be __main__ modules (from mf.run_script), but
    129         # we don't need it in the zipfile we build.
    130         if item.identifier == "__main__":
    131             continue
    132         src = item.filename
    133         if src and src != '-':
    134             if isinstance(item, Script):
    135                 # Scripts are python files
    136                 py_files.append(item)
    137 
    138             elif isinstance(item, Extension):
    139                 extensions.append(item)
    140 
    141             else:
    142                 py_files.append(item)
    143 
    144     # sort on the file names, the output is nicer to read
    145     py_files.sort(key=lambda v: v.filename)
    146     extensions.sort(key=lambda v: v.filename)
    147     return py_files, extensions
    148 
    149 
    150 def plat_prepare(includes, packages, excludes):
    151     # used by Python itself
    152     includes.update(["warnings", "unicodedata", "weakref"])
    153 
    154     #if os.uname()[0] != 'java':
    155         # Jython specific imports in the stdlib:
    156         #excludes.update([
    157         #    'java.lang',
    158         #    'org.python.core',
    159         #])
    160 
    161     if not sys.platform.startswith('irix'):
    162         excludes.update([
    163             'AL',
    164             'sgi',
    165             'vms_lib',
    166         ])
    167 
    168     if not sys.platform in ('mac', 'darwin'):
    169         # XXX - this doesn't look nearly complete
    170         excludes.update([
    171             'Audio_mac',
    172             'Carbon.File',
    173             'Carbon.Folder',
    174             'Carbon.Folders',
    175             'EasyDialogs',
    176             'MacOS',
    177             'macfs',
    178             'macostools',
    179             #'macpath',
    180             '_scproxy',
    181         ])
    182 
    183     if not sys.platform == 'win32':
    184         # only win32
    185         excludes.update([
    186             #'ntpath',
    187             'nturl2path',
    188             'win32api',
    189             'win32con',
    190             'win32event',
    191             'win32evtlogutil',
    192             'win32evtlog',
    193             'win32file',
    194             'win32gui',
    195             'win32pipe',
    196             'win32process',
    197             'win32security',
    198             'pywintypes',
    199             'winsound',
    200             'win32',
    201             '_winreg',
    202             '_winapi',
    203             'msvcrt',
    204             'winreg',
    205             '_subprocess',
    206          ])
    207 
    208     if not sys.platform == 'riscos':
    209         excludes.update([
    210              'riscosenviron',
    211              #'riscospath',
    212              'rourl2path',
    213           ])
    214 
    215     if not sys.platform == 'dos' or sys.platform.startswith('ms-dos'):
    216         excludes.update([
    217             'dos',
    218         ])
    219 
    220     if not sys.platform == 'os2emx':
    221         excludes.update([
    222             #'os2emxpath',
    223             '_emx_link',
    224         ])
    225 
    226     excludes.update(set(['posix', 'nt', 'os2', 'mac', 'ce', 'riscos']) - set(sys.builtin_module_names))
    227 
    228     # Carbon.Res depends on this, but the module hasn't been present
    229     # for a while...
    230     excludes.add('OverrideFrom23')
    231     excludes.add('OverrideFrom23._Res')
    232 
    233     # import trickery in the dummy_threading module (stdlib)
    234     excludes.add('_dummy_threading')
    235 
    236     try:
    237         imp_find_module('poll')
    238     except ImportError:
    239         excludes.update([
    240             'poll',
    241         ])
    242 
    243 def find_needed_modules(mf=None, scripts=(), includes=(), packages=(), warn=warnings.warn):
    244     if mf is None:
    245         mf = modulegraph.ModuleGraph()
    246     # feed Modulefinder with everything, and return it.
    247 
    248     for path in scripts:
    249         mf.run_script(path)
    250 
    251     for mod in includes:
    252         try:
    253             if mod[-2:] == '.*':
    254                 mf.import_hook(mod[:-2], None, ['*'])
    255             else:
    256                 mf.import_hook(mod)
    257         except ImportError:
    258             warn("No module named %s"%(mod,))
    259 
    260     for f in packages:
    261         # If modulegraph has seen a reference to the package, then
    262         # we prefer to believe that (imp_find_module doesn't seem to locate
    263         # sub-packages)
    264         m = mf.findNode(f)
    265         if m is not None:
    266             path = m.packagepath[0]
    267         else:
    268             # Find path of package
    269             # TODO: use imp_find_module_or_importer
    270             try:
    271                 path = imp_find_module(f, mf.path)[1]
    272             except ImportError:
    273                 warn("No package named %s" % f)
    274                 continue
    275 
    276         # walk the path to find subdirs containing __init__.py files
    277         # scan the results (directory of __init__.py files)
    278         # first trim the path (of the head package),
    279         # then convert directory name in package name,
    280         # finally push into modulegraph.
    281         # FIXME:
    282         # 1) Needs to be adjusted for namespace packages in python 3.3
    283         # 2) Code is fairly dodgy and needs better tests
    284         for (dirpath, dirnames, filenames) in os.walk(path):
    285             if '__init__.py' in filenames and dirpath.startswith(path):
    286                 package = f + '.' + dirpath[len(path)+1:].replace(os.sep, '.')
    287                 if package.endswith('.'):
    288                     package = package[:-1]
    289                 m = mf.import_hook(package, None, ["*"])
    290             else:
    291                 # Exclude subtrees that aren't packages
    292                 dirnames[:] = []
    293 
    294 
    295     return mf
    296 
    297 #
    298 # resource constants
    299 #
    300 PY_SUFFIXES = ['.py', '.pyw', '.pyo', '.pyc']
    301 C_SUFFIXES = [
    302     _triple[0] for _triple in imp.get_suffixes()
    303     if _triple[2] == imp.C_EXTENSION
    304 ]
    305 
    306 #
    307 # side-effects
    308 #
    309 
    310 def _replacePackages():
    311     REPLACEPACKAGES = {
    312         '_xmlplus':     'xml',
    313     }
    314     for k,v in REPLACEPACKAGES.items():
    315         modulegraph.replacePackage(k, v)
    316 
    317 _replacePackages()
    318 
    319 def find_modules(scripts=(), includes=(), packages=(), excludes=(), path=None, debug=0):
    320     """
    321     High-level interface, takes iterables for:
    322         scripts, includes, packages, excludes
    323 
    324     And returns a :class:`modulegraph.modulegraph.ModuleGraph` instance,
    325     python_files, and extensions
    326 
    327     python_files is a list of pure python dependencies as modulegraph.Module objects,
    328     extensions is a list of platform-specific C extension dependencies as modulegraph.Module objects
    329     """
    330     scripts = set(scripts)
    331     includes = set(includes)
    332     packages = set(packages)
    333     excludes = set(excludes)
    334     plat_prepare(includes, packages, excludes)
    335     mf = modulegraph.ModuleGraph(
    336         path=path,
    337         excludes=(excludes - includes),
    338         implies=get_implies(),
    339         debug=debug,
    340     )
    341     find_needed_modules(mf, scripts, includes, packages)
    342     return mf
    343 
    344 def test():
    345     if '-g' in sys.argv[1:]:
    346         sys.argv.remove('-g')
    347         dograph = True
    348     else:
    349         dograph = False
    350     if '-x' in sys.argv[1:]:
    351         sys.argv.remove('-x')
    352         doxref = True
    353     else:
    354         doxref= False
    355 
    356     scripts = sys.argv[1:] or [__file__]
    357     mf = find_modules(scripts=scripts)
    358     if doxref:
    359         mf.create_xref()
    360     elif dograph:
    361         mf.graphreport()
    362     else:
    363         mf.report()
    364 
    365 if __name__ == '__main__':
    366     test()
    367