Home | History | Annotate | Download | only in distutils
      1 """distutils.util
      2 
      3 Miscellaneous utility functions -- anything that doesn't fit into
      4 one of the other *util.py modules.
      5 """
      6 
      7 __revision__ = "$Id$"
      8 
      9 import sys, os, string, re
     10 from distutils.errors import DistutilsPlatformError
     11 from distutils.dep_util import newer
     12 from distutils.spawn import spawn
     13 from distutils import log
     14 from distutils.errors import DistutilsByteCompileError
     15 
     16 def get_platform ():
     17     """Return a string that identifies the current platform.  This is used
     18     mainly to distinguish platform-specific build directories and
     19     platform-specific built distributions.  Typically includes the OS name
     20     and version and the architecture (as supplied by 'os.uname()'),
     21     although the exact information included depends on the OS; eg. for IRIX
     22     the architecture isn't particularly important (IRIX only runs on SGI
     23     hardware), but for Linux the kernel version isn't particularly
     24     important.
     25 
     26     Examples of returned values:
     27        linux-i586
     28        linux-alpha (?)
     29        solaris-2.6-sun4u
     30        irix-5.3
     31        irix64-6.2
     32 
     33     Windows will return one of:
     34        win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
     35        win-ia64 (64bit Windows on Itanium)
     36        win32 (all others - specifically, sys.platform is returned)
     37 
     38     For other non-POSIX platforms, currently just returns 'sys.platform'.
     39     """
     40     if os.name == 'nt':
     41         # sniff sys.version for architecture.
     42         prefix = " bit ("
     43         i = string.find(sys.version, prefix)
     44         if i == -1:
     45             return sys.platform
     46         j = string.find(sys.version, ")", i)
     47         look = sys.version[i+len(prefix):j].lower()
     48         if look=='amd64':
     49             return 'win-amd64'
     50         if look=='itanium':
     51             return 'win-ia64'
     52         return sys.platform
     53 
     54     # Set for cross builds explicitly
     55     if "_PYTHON_HOST_PLATFORM" in os.environ:
     56         return os.environ["_PYTHON_HOST_PLATFORM"]
     57 
     58     if os.name != "posix" or not hasattr(os, 'uname'):
     59         # XXX what about the architecture? NT is Intel or Alpha,
     60         # Mac OS is M68k or PPC, etc.
     61         return sys.platform
     62 
     63     # Try to distinguish various flavours of Unix
     64 
     65     (osname, host, release, version, machine) = os.uname()
     66 
     67     # Convert the OS name to lowercase, remove '/' characters
     68     # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
     69     osname = string.lower(osname)
     70     osname = string.replace(osname, '/', '')
     71     machine = string.replace(machine, ' ', '_')
     72     machine = string.replace(machine, '/', '-')
     73 
     74     if osname[:5] == "linux":
     75         # At least on Linux/Intel, 'machine' is the processor --
     76         # i386, etc.
     77         # XXX what about Alpha, SPARC, etc?
     78         return  "%s-%s" % (osname, machine)
     79     elif osname[:5] == "sunos":
     80         if release[0] >= "5":           # SunOS 5 == Solaris 2
     81             osname = "solaris"
     82             release = "%d.%s" % (int(release[0]) - 3, release[2:])
     83             # We can't use "platform.architecture()[0]" because a
     84             # bootstrap problem. We use a dict to get an error
     85             # if some suspicious happens.
     86             bitness = {2147483647:"32bit", 9223372036854775807:"64bit"}
     87             machine += ".%s" % bitness[sys.maxint]
     88         # fall through to standard osname-release-machine representation
     89     elif osname[:4] == "irix":              # could be "irix64"!
     90         return "%s-%s" % (osname, release)
     91     elif osname[:3] == "aix":
     92         return "%s-%s.%s" % (osname, version, release)
     93     elif osname[:6] == "cygwin":
     94         osname = "cygwin"
     95         rel_re = re.compile (r'[\d.]+')
     96         m = rel_re.match(release)
     97         if m:
     98             release = m.group()
     99     elif osname[:6] == "darwin":
    100         import _osx_support, distutils.sysconfig
    101         osname, release, machine = _osx_support.get_platform_osx(
    102                                         distutils.sysconfig.get_config_vars(),
    103                                         osname, release, machine)
    104 
    105     return "%s-%s-%s" % (osname, release, machine)
    106 
    107 # get_platform ()
    108 
    109 
    110 def convert_path (pathname):
    111     """Return 'pathname' as a name that will work on the native filesystem,
    112     i.e. split it on '/' and put it back together again using the current
    113     directory separator.  Needed because filenames in the setup script are
    114     always supplied in Unix style, and have to be converted to the local
    115     convention before we can actually use them in the filesystem.  Raises
    116     ValueError on non-Unix-ish systems if 'pathname' either starts or
    117     ends with a slash.
    118     """
    119     if os.sep == '/':
    120         return pathname
    121     if not pathname:
    122         return pathname
    123     if pathname[0] == '/':
    124         raise ValueError, "path '%s' cannot be absolute" % pathname
    125     if pathname[-1] == '/':
    126         raise ValueError, "path '%s' cannot end with '/'" % pathname
    127 
    128     paths = string.split(pathname, '/')
    129     while '.' in paths:
    130         paths.remove('.')
    131     if not paths:
    132         return os.curdir
    133     return os.path.join(*paths)
    134 
    135 # convert_path ()
    136 
    137 
    138 def change_root (new_root, pathname):
    139     """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
    140     relative, this is equivalent to "os.path.join(new_root,pathname)".
    141     Otherwise, it requires making 'pathname' relative and then joining the
    142     two, which is tricky on DOS/Windows and Mac OS.
    143     """
    144     if os.name == 'posix':
    145         if not os.path.isabs(pathname):
    146             return os.path.join(new_root, pathname)
    147         else:
    148             return os.path.join(new_root, pathname[1:])
    149 
    150     elif os.name == 'nt':
    151         (drive, path) = os.path.splitdrive(pathname)
    152         if path[0] == '\\':
    153             path = path[1:]
    154         return os.path.join(new_root, path)
    155 
    156     elif os.name == 'os2':
    157         (drive, path) = os.path.splitdrive(pathname)
    158         if path[0] == os.sep:
    159             path = path[1:]
    160         return os.path.join(new_root, path)
    161 
    162     else:
    163         raise DistutilsPlatformError, \
    164               "nothing known about platform '%s'" % os.name
    165 
    166 
    167 _environ_checked = 0
    168 def check_environ ():
    169     """Ensure that 'os.environ' has all the environment variables we
    170     guarantee that users can use in config files, command-line options,
    171     etc.  Currently this includes:
    172       HOME - user's home directory (Unix only)
    173       PLAT - description of the current platform, including hardware
    174              and OS (see 'get_platform()')
    175     """
    176     global _environ_checked
    177     if _environ_checked:
    178         return
    179 
    180     if os.name == 'posix' and 'HOME' not in os.environ:
    181         import pwd
    182         os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
    183 
    184     if 'PLAT' not in os.environ:
    185         os.environ['PLAT'] = get_platform()
    186 
    187     _environ_checked = 1
    188 
    189 
    190 def subst_vars (s, local_vars):
    191     """Perform shell/Perl-style variable substitution on 'string'.  Every
    192     occurrence of '$' followed by a name is considered a variable, and
    193     variable is substituted by the value found in the 'local_vars'
    194     dictionary, or in 'os.environ' if it's not in 'local_vars'.
    195     'os.environ' is first checked/augmented to guarantee that it contains
    196     certain values: see 'check_environ()'.  Raise ValueError for any
    197     variables not found in either 'local_vars' or 'os.environ'.
    198     """
    199     check_environ()
    200     def _subst (match, local_vars=local_vars):
    201         var_name = match.group(1)
    202         if var_name in local_vars:
    203             return str(local_vars[var_name])
    204         else:
    205             return os.environ[var_name]
    206 
    207     try:
    208         return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
    209     except KeyError, var:
    210         raise ValueError, "invalid variable '$%s'" % var
    211 
    212 # subst_vars ()
    213 
    214 
    215 def grok_environment_error (exc, prefix="error: "):
    216     # Function kept for backward compatibility.
    217     # Used to try clever things with EnvironmentErrors,
    218     # but nowadays str(exception) produces good messages.
    219     return prefix + str(exc)
    220 
    221 
    222 # Needed by 'split_quoted()'
    223 _wordchars_re = _squote_re = _dquote_re = None
    224 def _init_regex():
    225     global _wordchars_re, _squote_re, _dquote_re
    226     _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
    227     _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
    228     _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
    229 
    230 def split_quoted (s):
    231     """Split a string up according to Unix shell-like rules for quotes and
    232     backslashes.  In short: words are delimited by spaces, as long as those
    233     spaces are not escaped by a backslash, or inside a quoted string.
    234     Single and double quotes are equivalent, and the quote characters can
    235     be backslash-escaped.  The backslash is stripped from any two-character
    236     escape sequence, leaving only the escaped character.  The quote
    237     characters are stripped from any quoted string.  Returns a list of
    238     words.
    239     """
    240 
    241     # This is a nice algorithm for splitting up a single string, since it
    242     # doesn't require character-by-character examination.  It was a little
    243     # bit of a brain-bender to get it working right, though...
    244     if _wordchars_re is None: _init_regex()
    245 
    246     s = string.strip(s)
    247     words = []
    248     pos = 0
    249 
    250     while s:
    251         m = _wordchars_re.match(s, pos)
    252         end = m.end()
    253         if end == len(s):
    254             words.append(s[:end])
    255             break
    256 
    257         if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
    258             words.append(s[:end])       # we definitely have a word delimiter
    259             s = string.lstrip(s[end:])
    260             pos = 0
    261 
    262         elif s[end] == '\\':            # preserve whatever is being escaped;
    263                                         # will become part of the current word
    264             s = s[:end] + s[end+1:]
    265             pos = end+1
    266 
    267         else:
    268             if s[end] == "'":           # slurp singly-quoted string
    269                 m = _squote_re.match(s, end)
    270             elif s[end] == '"':         # slurp doubly-quoted string
    271                 m = _dquote_re.match(s, end)
    272             else:
    273                 raise RuntimeError, \
    274                       "this can't happen (bad char '%c')" % s[end]
    275 
    276             if m is None:
    277                 raise ValueError, \
    278                       "bad string (mismatched %s quotes?)" % s[end]
    279 
    280             (beg, end) = m.span()
    281             s = s[:beg] + s[beg+1:end-1] + s[end:]
    282             pos = m.end() - 2
    283 
    284         if pos >= len(s):
    285             words.append(s)
    286             break
    287 
    288     return words
    289 
    290 # split_quoted ()
    291 
    292 
    293 def execute (func, args, msg=None, verbose=0, dry_run=0):
    294     """Perform some action that affects the outside world (eg.  by
    295     writing to the filesystem).  Such actions are special because they
    296     are disabled by the 'dry_run' flag.  This method takes care of all
    297     that bureaucracy for you; all you have to do is supply the
    298     function to call and an argument tuple for it (to embody the
    299     "external action" being performed), and an optional message to
    300     print.
    301     """
    302     if msg is None:
    303         msg = "%s%r" % (func.__name__, args)
    304         if msg[-2:] == ',)':        # correct for singleton tuple
    305             msg = msg[0:-2] + ')'
    306 
    307     log.info(msg)
    308     if not dry_run:
    309         func(*args)
    310 
    311 
    312 def strtobool (val):
    313     """Convert a string representation of truth to true (1) or false (0).
    314 
    315     True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
    316     are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
    317     'val' is anything else.
    318     """
    319     val = string.lower(val)
    320     if val in ('y', 'yes', 't', 'true', 'on', '1'):
    321         return 1
    322     elif val in ('n', 'no', 'f', 'false', 'off', '0'):
    323         return 0
    324     else:
    325         raise ValueError, "invalid truth value %r" % (val,)
    326 
    327 
    328 def byte_compile (py_files,
    329                   optimize=0, force=0,
    330                   prefix=None, base_dir=None,
    331                   verbose=1, dry_run=0,
    332                   direct=None):
    333     """Byte-compile a collection of Python source files to either .pyc
    334     or .pyo files in the same directory.  'py_files' is a list of files
    335     to compile; any files that don't end in ".py" are silently skipped.
    336     'optimize' must be one of the following:
    337       0 - don't optimize (generate .pyc)
    338       1 - normal optimization (like "python -O")
    339       2 - extra optimization (like "python -OO")
    340     If 'force' is true, all files are recompiled regardless of
    341     timestamps.
    342 
    343     The source filename encoded in each bytecode file defaults to the
    344     filenames listed in 'py_files'; you can modify these with 'prefix' and
    345     'basedir'.  'prefix' is a string that will be stripped off of each
    346     source filename, and 'base_dir' is a directory name that will be
    347     prepended (after 'prefix' is stripped).  You can supply either or both
    348     (or neither) of 'prefix' and 'base_dir', as you wish.
    349 
    350     If 'dry_run' is true, doesn't actually do anything that would
    351     affect the filesystem.
    352 
    353     Byte-compilation is either done directly in this interpreter process
    354     with the standard py_compile module, or indirectly by writing a
    355     temporary script and executing it.  Normally, you should let
    356     'byte_compile()' figure out to use direct compilation or not (see
    357     the source for details).  The 'direct' flag is used by the script
    358     generated in indirect mode; unless you know what you're doing, leave
    359     it set to None.
    360     """
    361     # nothing is done if sys.dont_write_bytecode is True
    362     if sys.dont_write_bytecode:
    363         raise DistutilsByteCompileError('byte-compiling is disabled.')
    364 
    365     # First, if the caller didn't force us into direct or indirect mode,
    366     # figure out which mode we should be in.  We take a conservative
    367     # approach: choose direct mode *only* if the current interpreter is
    368     # in debug mode and optimize is 0.  If we're not in debug mode (-O
    369     # or -OO), we don't know which level of optimization this
    370     # interpreter is running with, so we can't do direct
    371     # byte-compilation and be certain that it's the right thing.  Thus,
    372     # always compile indirectly if the current interpreter is in either
    373     # optimize mode, or if either optimization level was requested by
    374     # the caller.
    375     if direct is None:
    376         direct = (__debug__ and optimize == 0)
    377 
    378     # "Indirect" byte-compilation: write a temporary script and then
    379     # run it with the appropriate flags.
    380     if not direct:
    381         try:
    382             from tempfile import mkstemp
    383             (script_fd, script_name) = mkstemp(".py")
    384         except ImportError:
    385             from tempfile import mktemp
    386             (script_fd, script_name) = None, mktemp(".py")
    387         log.info("writing byte-compilation script '%s'", script_name)
    388         if not dry_run:
    389             if script_fd is not None:
    390                 script = os.fdopen(script_fd, "w")
    391             else:
    392                 script = open(script_name, "w")
    393 
    394             script.write("""\
    395 from distutils.util import byte_compile
    396 files = [
    397 """)
    398 
    399             # XXX would be nice to write absolute filenames, just for
    400             # safety's sake (script should be more robust in the face of
    401             # chdir'ing before running it).  But this requires abspath'ing
    402             # 'prefix' as well, and that breaks the hack in build_lib's
    403             # 'byte_compile()' method that carefully tacks on a trailing
    404             # slash (os.sep really) to make sure the prefix here is "just
    405             # right".  This whole prefix business is rather delicate -- the
    406             # problem is that it's really a directory, but I'm treating it
    407             # as a dumb string, so trailing slashes and so forth matter.
    408 
    409             #py_files = map(os.path.abspath, py_files)
    410             #if prefix:
    411             #    prefix = os.path.abspath(prefix)
    412 
    413             script.write(string.join(map(repr, py_files), ",\n") + "]\n")
    414             script.write("""
    415 byte_compile(files, optimize=%r, force=%r,
    416              prefix=%r, base_dir=%r,
    417              verbose=%r, dry_run=0,
    418              direct=1)
    419 """ % (optimize, force, prefix, base_dir, verbose))
    420 
    421             script.close()
    422 
    423         cmd = [sys.executable, script_name]
    424         if optimize == 1:
    425             cmd.insert(1, "-O")
    426         elif optimize == 2:
    427             cmd.insert(1, "-OO")
    428         spawn(cmd, dry_run=dry_run)
    429         execute(os.remove, (script_name,), "removing %s" % script_name,
    430                 dry_run=dry_run)
    431 
    432     # "Direct" byte-compilation: use the py_compile module to compile
    433     # right here, right now.  Note that the script generated in indirect
    434     # mode simply calls 'byte_compile()' in direct mode, a weird sort of
    435     # cross-process recursion.  Hey, it works!
    436     else:
    437         from py_compile import compile
    438 
    439         for file in py_files:
    440             if file[-3:] != ".py":
    441                 # This lets us be lazy and not filter filenames in
    442                 # the "install_lib" command.
    443                 continue
    444 
    445             # Terminology from the py_compile module:
    446             #   cfile - byte-compiled file
    447             #   dfile - purported source filename (same as 'file' by default)
    448             cfile = file + (__debug__ and "c" or "o")
    449             dfile = file
    450             if prefix:
    451                 if file[:len(prefix)] != prefix:
    452                     raise ValueError, \
    453                           ("invalid prefix: filename %r doesn't start with %r"
    454                            % (file, prefix))
    455                 dfile = dfile[len(prefix):]
    456             if base_dir:
    457                 dfile = os.path.join(base_dir, dfile)
    458 
    459             cfile_base = os.path.basename(cfile)
    460             if direct:
    461                 if force or newer(file, cfile):
    462                     log.info("byte-compiling %s to %s", file, cfile_base)
    463                     if not dry_run:
    464                         compile(file, cfile, dfile)
    465                 else:
    466                     log.debug("skipping byte-compilation of %s to %s",
    467                               file, cfile_base)
    468 
    469 # byte_compile ()
    470 
    471 def rfc822_escape (header):
    472     """Return a version of the string escaped for inclusion in an
    473     RFC-822 header, by ensuring there are 8 spaces space after each newline.
    474     """
    475     lines = string.split(header, '\n')
    476     header = string.join(lines, '\n' + 8*' ')
    477     return header
    478