Home | History | Annotate | Download | only in distutils
      1 """distutils.util
      2 
      3 Miscellaneous utility functions -- anything that doesn't fit into
      4 one of the other *util.py modules.
      5 """
      6 
      7 __revision__ = "$Id$"
      8 
      9 import sys, os, string, re
     10 from distutils.errors import DistutilsPlatformError
     11 from distutils.dep_util import newer
     12 from distutils.spawn import spawn
     13 from distutils import log
     14 from distutils.errors import DistutilsByteCompileError
     15 
     16 def get_platform ():
     17     """Return a string that identifies the current platform.  This is used
     18     mainly to distinguish platform-specific build directories and
     19     platform-specific built distributions.  Typically includes the OS name
     20     and version and the architecture (as supplied by 'os.uname()'),
     21     although the exact information included depends on the OS; eg. for IRIX
     22     the architecture isn't particularly important (IRIX only runs on SGI
     23     hardware), but for Linux the kernel version isn't particularly
     24     important.
     25 
     26     Examples of returned values:
     27        linux-i586
     28        linux-alpha (?)
     29        solaris-2.6-sun4u
     30        irix-5.3
     31        irix64-6.2
     32 
     33     Windows will return one of:
     34        win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
     35        win-ia64 (64bit Windows on Itanium)
     36        win32 (all others - specifically, sys.platform is returned)
     37 
     38     For other non-POSIX platforms, currently just returns 'sys.platform'.
     39     """
     40     if os.name == 'nt':
     41         # sniff sys.version for architecture.
     42         prefix = " bit ("
     43         i = string.find(sys.version, prefix)
     44         if i == -1:
     45             return sys.platform
     46         j = string.find(sys.version, ")", i)
     47         look = sys.version[i+len(prefix):j].lower()
     48         if look=='amd64':
     49             return 'win-amd64'
     50         if look=='itanium':
     51             return 'win-ia64'
     52         return sys.platform
     53 
     54     # Set for cross builds explicitly
     55     if "_PYTHON_HOST_PLATFORM" in os.environ:
     56         return os.environ["_PYTHON_HOST_PLATFORM"]
     57 
     58     if os.name != "posix" or not hasattr(os, 'uname'):
     59         # XXX what about the architecture? NT is Intel or Alpha,
     60         # Mac OS is M68k or PPC, etc.
     61         return sys.platform
     62 
     63     # Try to distinguish various flavours of Unix
     64 
     65     (osname, host, release, version, machine) = os.uname()
     66 
     67     # Convert the OS name to lowercase, remove '/' characters
     68     # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
     69     osname = string.lower(osname)
     70     osname = string.replace(osname, '/', '')
     71     machine = string.replace(machine, ' ', '_')
     72     machine = string.replace(machine, '/', '-')
     73 
     74     if osname[:5] == "linux":
     75         # At least on Linux/Intel, 'machine' is the processor --
     76         # i386, etc.
     77         # XXX what about Alpha, SPARC, etc?
     78         return  "%s-%s" % (osname, machine)
     79     elif osname[:5] == "sunos":
     80         if release[0] >= "5":           # SunOS 5 == Solaris 2
     81             osname = "solaris"
     82             release = "%d.%s" % (int(release[0]) - 3, release[2:])
     83             # We can't use "platform.architecture()[0]" because a
     84             # bootstrap problem. We use a dict to get an error
     85             # if some suspicious happens.
     86             bitness = {2147483647:"32bit", 9223372036854775807:"64bit"}
     87             machine += ".%s" % bitness[sys.maxint]
     88         # fall through to standard osname-release-machine representation
     89     elif osname[:4] == "irix":              # could be "irix64"!
     90         return "%s-%s" % (osname, release)
     91     elif osname[:3] == "aix":
     92         return "%s-%s.%s" % (osname, version, release)
     93     elif osname[:6] == "cygwin":
     94         osname = "cygwin"
     95         rel_re = re.compile (r'[\d.]+')
     96         m = rel_re.match(release)
     97         if m:
     98             release = m.group()
     99     elif osname[:6] == "darwin":
    100         import _osx_support, distutils.sysconfig
    101         osname, release, machine = _osx_support.get_platform_osx(
    102                                         distutils.sysconfig.get_config_vars(),
    103                                         osname, release, machine)
    104 
    105     return "%s-%s-%s" % (osname, release, machine)
    106 
    107 # get_platform ()
    108 
    109 
    110 def convert_path (pathname):
    111     """Return 'pathname' as a name that will work on the native filesystem,
    112     i.e. split it on '/' and put it back together again using the current
    113     directory separator.  Needed because filenames in the setup script are
    114     always supplied in Unix style, and have to be converted to the local
    115     convention before we can actually use them in the filesystem.  Raises
    116     ValueError on non-Unix-ish systems if 'pathname' either starts or
    117     ends with a slash.
    118     """
    119     if os.sep == '/':
    120         return pathname
    121     if not pathname:
    122         return pathname
    123     if pathname[0] == '/':
    124         raise ValueError, "path '%s' cannot be absolute" % pathname
    125     if pathname[-1] == '/':
    126         raise ValueError, "path '%s' cannot end with '/'" % pathname
    127 
    128     paths = string.split(pathname, '/')
    129     while '.' in paths:
    130         paths.remove('.')
    131     if not paths:
    132         return os.curdir
    133     # On Windows, if paths is ['C:','folder','subfolder'] then
    134     # os.path.join(*paths) will return 'C:folder\subfolder' which
    135     # is thus relative to the CWD on that drive. So we work around
    136     # this by adding a \ to path[0]
    137     if (len(paths) > 0 and paths[0].endswith(':') and
    138         sys.platform == "win32" and sys.version.find("GCC") >= 0):
    139         paths[0] += '\\'
    140     return os.path.join(*paths)
    141 
    142 # convert_path ()
    143 
    144 
    145 def change_root (new_root, pathname):
    146     """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
    147     relative, this is equivalent to "os.path.join(new_root,pathname)".
    148     Otherwise, it requires making 'pathname' relative and then joining the
    149     two, which is tricky on DOS/Windows and Mac OS.
    150     """
    151     if os.name == 'posix':
    152         if not os.path.isabs(pathname):
    153             return os.path.join(new_root, pathname)
    154         else:
    155             return os.path.join(new_root, pathname[1:])
    156 
    157     elif os.name == 'nt':
    158         (drive, path) = os.path.splitdrive(pathname)
    159         if path[0] == '\\':
    160             path = path[1:]
    161         return os.path.join(new_root, path)
    162 
    163     elif os.name == 'os2':
    164         (drive, path) = os.path.splitdrive(pathname)
    165         if path[0] == os.sep:
    166             path = path[1:]
    167         return os.path.join(new_root, path)
    168 
    169     else:
    170         raise DistutilsPlatformError, \
    171               "nothing known about platform '%s'" % os.name
    172 
    173 
    174 _environ_checked = 0
    175 def check_environ ():
    176     """Ensure that 'os.environ' has all the environment variables we
    177     guarantee that users can use in config files, command-line options,
    178     etc.  Currently this includes:
    179       HOME - user's home directory (Unix only)
    180       PLAT - description of the current platform, including hardware
    181              and OS (see 'get_platform()')
    182     """
    183     global _environ_checked
    184     if _environ_checked:
    185         return
    186 
    187     if os.name == 'posix' and 'HOME' not in os.environ:
    188         import pwd
    189         os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
    190 
    191     if 'PLAT' not in os.environ:
    192         os.environ['PLAT'] = get_platform()
    193 
    194     _environ_checked = 1
    195 
    196 
    197 def subst_vars (s, local_vars):
    198     """Perform shell/Perl-style variable substitution on 'string'.  Every
    199     occurrence of '$' followed by a name is considered a variable, and
    200     variable is substituted by the value found in the 'local_vars'
    201     dictionary, or in 'os.environ' if it's not in 'local_vars'.
    202     'os.environ' is first checked/augmented to guarantee that it contains
    203     certain values: see 'check_environ()'.  Raise ValueError for any
    204     variables not found in either 'local_vars' or 'os.environ'.
    205     """
    206     check_environ()
    207     def _subst (match, local_vars=local_vars):
    208         var_name = match.group(1)
    209         if var_name in local_vars:
    210             return str(local_vars[var_name])
    211         else:
    212             return os.environ[var_name]
    213 
    214     try:
    215         return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
    216     except KeyError, var:
    217         raise ValueError, "invalid variable '$%s'" % var
    218 
    219 # subst_vars ()
    220 
    221 
    222 def grok_environment_error (exc, prefix="error: "):
    223     """Generate a useful error message from an EnvironmentError (IOError or
    224     OSError) exception object.  Handles Python 1.5.1 and 1.5.2 styles, and
    225     does what it can to deal with exception objects that don't have a
    226     filename (which happens when the error is due to a two-file operation,
    227     such as 'rename()' or 'link()'.  Returns the error message as a string
    228     prefixed with 'prefix'.
    229     """
    230     # check for Python 1.5.2-style {IO,OS}Error exception objects
    231     if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
    232         if exc.filename:
    233             error = prefix + "%s: %s" % (exc.filename, exc.strerror)
    234         else:
    235             # two-argument functions in posix module don't
    236             # include the filename in the exception object!
    237             error = prefix + "%s" % exc.strerror
    238     else:
    239         error = prefix + str(exc[-1])
    240 
    241     return error
    242 
    243 
    244 # Needed by 'split_quoted()'
    245 _wordchars_re = _squote_re = _dquote_re = None
    246 def _init_regex():
    247     global _wordchars_re, _squote_re, _dquote_re
    248     _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
    249     _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
    250     _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
    251 
    252 def split_quoted (s):
    253     """Split a string up according to Unix shell-like rules for quotes and
    254     backslashes.  In short: words are delimited by spaces, as long as those
    255     spaces are not escaped by a backslash, or inside a quoted string.
    256     Single and double quotes are equivalent, and the quote characters can
    257     be backslash-escaped.  The backslash is stripped from any two-character
    258     escape sequence, leaving only the escaped character.  The quote
    259     characters are stripped from any quoted string.  Returns a list of
    260     words.
    261     """
    262 
    263     # This is a nice algorithm for splitting up a single string, since it
    264     # doesn't require character-by-character examination.  It was a little
    265     # bit of a brain-bender to get it working right, though...
    266     if _wordchars_re is None: _init_regex()
    267 
    268     s = string.strip(s)
    269     words = []
    270     pos = 0
    271 
    272     while s:
    273         m = _wordchars_re.match(s, pos)
    274         end = m.end()
    275         if end == len(s):
    276             words.append(s[:end])
    277             break
    278 
    279         if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
    280             words.append(s[:end])       # we definitely have a word delimiter
    281             s = string.lstrip(s[end:])
    282             pos = 0
    283 
    284         elif s[end] == '\\':            # preserve whatever is being escaped;
    285                                         # will become part of the current word
    286             s = s[:end] + s[end+1:]
    287             pos = end+1
    288 
    289         else:
    290             if s[end] == "'":           # slurp singly-quoted string
    291                 m = _squote_re.match(s, end)
    292             elif s[end] == '"':         # slurp doubly-quoted string
    293                 m = _dquote_re.match(s, end)
    294             else:
    295                 raise RuntimeError, \
    296                       "this can't happen (bad char '%c')" % s[end]
    297 
    298             if m is None:
    299                 raise ValueError, \
    300                       "bad string (mismatched %s quotes?)" % s[end]
    301 
    302             (beg, end) = m.span()
    303             s = s[:beg] + s[beg+1:end-1] + s[end:]
    304             pos = m.end() - 2
    305 
    306         if pos >= len(s):
    307             words.append(s)
    308             break
    309 
    310     return words
    311 
    312 # split_quoted ()
    313 
    314 
    315 def execute (func, args, msg=None, verbose=0, dry_run=0):
    316     """Perform some action that affects the outside world (eg.  by
    317     writing to the filesystem).  Such actions are special because they
    318     are disabled by the 'dry_run' flag.  This method takes care of all
    319     that bureaucracy for you; all you have to do is supply the
    320     function to call and an argument tuple for it (to embody the
    321     "external action" being performed), and an optional message to
    322     print.
    323     """
    324     if msg is None:
    325         msg = "%s%r" % (func.__name__, args)
    326         if msg[-2:] == ',)':        # correct for singleton tuple
    327             msg = msg[0:-2] + ')'
    328 
    329     log.info(msg)
    330     if not dry_run:
    331         func(*args)
    332 
    333 
    334 def strtobool (val):
    335     """Convert a string representation of truth to true (1) or false (0).
    336 
    337     True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
    338     are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
    339     'val' is anything else.
    340     """
    341     val = string.lower(val)
    342     if val in ('y', 'yes', 't', 'true', 'on', '1'):
    343         return 1
    344     elif val in ('n', 'no', 'f', 'false', 'off', '0'):
    345         return 0
    346     else:
    347         raise ValueError, "invalid truth value %r" % (val,)
    348 
    349 
    350 def byte_compile (py_files,
    351                   optimize=0, force=0,
    352                   prefix=None, base_dir=None,
    353                   verbose=1, dry_run=0,
    354                   direct=None):
    355     """Byte-compile a collection of Python source files to either .pyc
    356     or .pyo files in the same directory.  'py_files' is a list of files
    357     to compile; any files that don't end in ".py" are silently skipped.
    358     'optimize' must be one of the following:
    359       0 - don't optimize (generate .pyc)
    360       1 - normal optimization (like "python -O")
    361       2 - extra optimization (like "python -OO")
    362     If 'force' is true, all files are recompiled regardless of
    363     timestamps.
    364 
    365     The source filename encoded in each bytecode file defaults to the
    366     filenames listed in 'py_files'; you can modify these with 'prefix' and
    367     'basedir'.  'prefix' is a string that will be stripped off of each
    368     source filename, and 'base_dir' is a directory name that will be
    369     prepended (after 'prefix' is stripped).  You can supply either or both
    370     (or neither) of 'prefix' and 'base_dir', as you wish.
    371 
    372     If 'dry_run' is true, doesn't actually do anything that would
    373     affect the filesystem.
    374 
    375     Byte-compilation is either done directly in this interpreter process
    376     with the standard py_compile module, or indirectly by writing a
    377     temporary script and executing it.  Normally, you should let
    378     'byte_compile()' figure out to use direct compilation or not (see
    379     the source for details).  The 'direct' flag is used by the script
    380     generated in indirect mode; unless you know what you're doing, leave
    381     it set to None.
    382     """
    383     # nothing is done if sys.dont_write_bytecode is True
    384     if sys.dont_write_bytecode:
    385         raise DistutilsByteCompileError('byte-compiling is disabled.')
    386 
    387     # First, if the caller didn't force us into direct or indirect mode,
    388     # figure out which mode we should be in.  We take a conservative
    389     # approach: choose direct mode *only* if the current interpreter is
    390     # in debug mode and optimize is 0.  If we're not in debug mode (-O
    391     # or -OO), we don't know which level of optimization this
    392     # interpreter is running with, so we can't do direct
    393     # byte-compilation and be certain that it's the right thing.  Thus,
    394     # always compile indirectly if the current interpreter is in either
    395     # optimize mode, or if either optimization level was requested by
    396     # the caller.
    397     if direct is None:
    398         direct = (__debug__ and optimize == 0)
    399 
    400     # "Indirect" byte-compilation: write a temporary script and then
    401     # run it with the appropriate flags.
    402     if not direct:
    403         try:
    404             from tempfile import mkstemp
    405             (script_fd, script_name) = mkstemp(".py")
    406         except ImportError:
    407             from tempfile import mktemp
    408             (script_fd, script_name) = None, mktemp(".py")
    409         log.info("writing byte-compilation script '%s'", script_name)
    410         if not dry_run:
    411             if script_fd is not None:
    412                 script = os.fdopen(script_fd, "w")
    413             else:
    414                 script = open(script_name, "w")
    415 
    416             script.write("""\
    417 from distutils.util import byte_compile
    418 files = [
    419 """)
    420 
    421             # XXX would be nice to write absolute filenames, just for
    422             # safety's sake (script should be more robust in the face of
    423             # chdir'ing before running it).  But this requires abspath'ing
    424             # 'prefix' as well, and that breaks the hack in build_lib's
    425             # 'byte_compile()' method that carefully tacks on a trailing
    426             # slash (os.sep really) to make sure the prefix here is "just
    427             # right".  This whole prefix business is rather delicate -- the
    428             # problem is that it's really a directory, but I'm treating it
    429             # as a dumb string, so trailing slashes and so forth matter.
    430 
    431             #py_files = map(os.path.abspath, py_files)
    432             #if prefix:
    433             #    prefix = os.path.abspath(prefix)
    434 
    435             script.write(string.join(map(repr, py_files), ",\n") + "]\n")
    436             script.write("""
    437 byte_compile(files, optimize=%r, force=%r,
    438              prefix=%r, base_dir=%r,
    439              verbose=%r, dry_run=0,
    440              direct=1)
    441 """ % (optimize, force, prefix, base_dir, verbose))
    442 
    443             script.close()
    444 
    445         cmd = [sys.executable, script_name]
    446         if optimize == 1:
    447             cmd.insert(1, "-O")
    448         elif optimize == 2:
    449             cmd.insert(1, "-OO")
    450         spawn(cmd, dry_run=dry_run)
    451         execute(os.remove, (script_name,), "removing %s" % script_name,
    452                 dry_run=dry_run)
    453 
    454     # "Direct" byte-compilation: use the py_compile module to compile
    455     # right here, right now.  Note that the script generated in indirect
    456     # mode simply calls 'byte_compile()' in direct mode, a weird sort of
    457     # cross-process recursion.  Hey, it works!
    458     else:
    459         from py_compile import compile
    460 
    461         for file in py_files:
    462             if file[-3:] != ".py":
    463                 # This lets us be lazy and not filter filenames in
    464                 # the "install_lib" command.
    465                 continue
    466 
    467             # Terminology from the py_compile module:
    468             #   cfile - byte-compiled file
    469             #   dfile - purported source filename (same as 'file' by default)
    470             cfile = file + (__debug__ and "c" or "o")
    471             dfile = file
    472             if prefix:
    473                 if file[:len(prefix)] != prefix:
    474                     raise ValueError, \
    475                           ("invalid prefix: filename %r doesn't start with %r"
    476                            % (file, prefix))
    477                 dfile = dfile[len(prefix):]
    478             if base_dir:
    479                 dfile = os.path.join(base_dir, dfile)
    480 
    481             cfile_base = os.path.basename(cfile)
    482             if direct:
    483                 if force or newer(file, cfile):
    484                     log.info("byte-compiling %s to %s", file, cfile_base)
    485                     if not dry_run:
    486                         compile(file, cfile, dfile)
    487                 else:
    488                     log.debug("skipping byte-compilation of %s to %s",
    489                               file, cfile_base)
    490 
    491 # byte_compile ()
    492 
    493 def rfc822_escape (header):
    494     """Return a version of the string escaped for inclusion in an
    495     RFC-822 header, by ensuring there are 8 spaces space after each newline.
    496     """
    497     lines = string.split(header, '\n')
    498     header = string.join(lines, '\n' + 8*' ')
    499     return header
    500