1 """distutils.util 2 3 Miscellaneous utility functions -- anything that doesn't fit into 4 one of the other *util.py modules. 5 """ 6 7 __revision__ = "$Id$" 8 9 import sys, os, string, re 10 from distutils.errors import DistutilsPlatformError 11 from distutils.dep_util import newer 12 from distutils.spawn import spawn 13 from distutils import log 14 from distutils.errors import DistutilsByteCompileError 15 16 def get_platform (): 17 """Return a string that identifies the current platform. This is used 18 mainly to distinguish platform-specific build directories and 19 platform-specific built distributions. Typically includes the OS name 20 and version and the architecture (as supplied by 'os.uname()'), 21 although the exact information included depends on the OS; eg. for IRIX 22 the architecture isn't particularly important (IRIX only runs on SGI 23 hardware), but for Linux the kernel version isn't particularly 24 important. 25 26 Examples of returned values: 27 linux-i586 28 linux-alpha (?) 29 solaris-2.6-sun4u 30 irix-5.3 31 irix64-6.2 32 33 Windows will return one of: 34 win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc) 35 win-ia64 (64bit Windows on Itanium) 36 win32 (all others - specifically, sys.platform is returned) 37 38 For other non-POSIX platforms, currently just returns 'sys.platform'. 39 """ 40 if os.name == 'nt': 41 # sniff sys.version for architecture. 42 prefix = " bit (" 43 i = string.find(sys.version, prefix) 44 if i == -1: 45 return sys.platform 46 j = string.find(sys.version, ")", i) 47 look = sys.version[i+len(prefix):j].lower() 48 if look=='amd64': 49 return 'win-amd64' 50 if look=='itanium': 51 return 'win-ia64' 52 return sys.platform 53 54 # Set for cross builds explicitly 55 if "_PYTHON_HOST_PLATFORM" in os.environ: 56 return os.environ["_PYTHON_HOST_PLATFORM"] 57 58 if os.name != "posix" or not hasattr(os, 'uname'): 59 # XXX what about the architecture? NT is Intel or Alpha, 60 # Mac OS is M68k or PPC, etc. 61 return sys.platform 62 63 # Try to distinguish various flavours of Unix 64 65 (osname, host, release, version, machine) = os.uname() 66 67 # Convert the OS name to lowercase, remove '/' characters 68 # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh") 69 osname = string.lower(osname) 70 osname = string.replace(osname, '/', '') 71 machine = string.replace(machine, ' ', '_') 72 machine = string.replace(machine, '/', '-') 73 74 if osname[:5] == "linux": 75 # At least on Linux/Intel, 'machine' is the processor -- 76 # i386, etc. 77 # XXX what about Alpha, SPARC, etc? 78 return "%s-%s" % (osname, machine) 79 elif osname[:5] == "sunos": 80 if release[0] >= "5": # SunOS 5 == Solaris 2 81 osname = "solaris" 82 release = "%d.%s" % (int(release[0]) - 3, release[2:]) 83 # We can't use "platform.architecture()[0]" because a 84 # bootstrap problem. We use a dict to get an error 85 # if some suspicious happens. 86 bitness = {2147483647:"32bit", 9223372036854775807:"64bit"} 87 machine += ".%s" % bitness[sys.maxint] 88 # fall through to standard osname-release-machine representation 89 elif osname[:4] == "irix": # could be "irix64"! 90 return "%s-%s" % (osname, release) 91 elif osname[:3] == "aix": 92 return "%s-%s.%s" % (osname, version, release) 93 elif osname[:6] == "cygwin": 94 osname = "cygwin" 95 rel_re = re.compile (r'[\d.]+') 96 m = rel_re.match(release) 97 if m: 98 release = m.group() 99 elif osname[:6] == "darwin": 100 import _osx_support, distutils.sysconfig 101 osname, release, machine = _osx_support.get_platform_osx( 102 distutils.sysconfig.get_config_vars(), 103 osname, release, machine) 104 105 return "%s-%s-%s" % (osname, release, machine) 106 107 # get_platform () 108 109 110 def convert_path (pathname): 111 """Return 'pathname' as a name that will work on the native filesystem, 112 i.e. split it on '/' and put it back together again using the current 113 directory separator. Needed because filenames in the setup script are 114 always supplied in Unix style, and have to be converted to the local 115 convention before we can actually use them in the filesystem. Raises 116 ValueError on non-Unix-ish systems if 'pathname' either starts or 117 ends with a slash. 118 """ 119 if os.sep == '/': 120 return pathname 121 if not pathname: 122 return pathname 123 if pathname[0] == '/': 124 raise ValueError, "path '%s' cannot be absolute" % pathname 125 if pathname[-1] == '/': 126 raise ValueError, "path '%s' cannot end with '/'" % pathname 127 128 paths = string.split(pathname, '/') 129 while '.' in paths: 130 paths.remove('.') 131 if not paths: 132 return os.curdir 133 # On Windows, if paths is ['C:','folder','subfolder'] then 134 # os.path.join(*paths) will return 'C:folder\subfolder' which 135 # is thus relative to the CWD on that drive. So we work around 136 # this by adding a \ to path[0] 137 if (len(paths) > 0 and paths[0].endswith(':') and 138 sys.platform == "win32" and sys.version.find("GCC") >= 0): 139 paths[0] += '\\' 140 return os.path.join(*paths) 141 142 # convert_path () 143 144 145 def change_root (new_root, pathname): 146 """Return 'pathname' with 'new_root' prepended. If 'pathname' is 147 relative, this is equivalent to "os.path.join(new_root,pathname)". 148 Otherwise, it requires making 'pathname' relative and then joining the 149 two, which is tricky on DOS/Windows and Mac OS. 150 """ 151 if os.name == 'posix': 152 if not os.path.isabs(pathname): 153 return os.path.join(new_root, pathname) 154 else: 155 return os.path.join(new_root, pathname[1:]) 156 157 elif os.name == 'nt': 158 (drive, path) = os.path.splitdrive(pathname) 159 if path[0] == '\\': 160 path = path[1:] 161 return os.path.join(new_root, path) 162 163 elif os.name == 'os2': 164 (drive, path) = os.path.splitdrive(pathname) 165 if path[0] == os.sep: 166 path = path[1:] 167 return os.path.join(new_root, path) 168 169 else: 170 raise DistutilsPlatformError, \ 171 "nothing known about platform '%s'" % os.name 172 173 174 _environ_checked = 0 175 def check_environ (): 176 """Ensure that 'os.environ' has all the environment variables we 177 guarantee that users can use in config files, command-line options, 178 etc. Currently this includes: 179 HOME - user's home directory (Unix only) 180 PLAT - description of the current platform, including hardware 181 and OS (see 'get_platform()') 182 """ 183 global _environ_checked 184 if _environ_checked: 185 return 186 187 if os.name == 'posix' and 'HOME' not in os.environ: 188 import pwd 189 os.environ['HOME'] = pwd.getpwuid(os.getuid())[5] 190 191 if 'PLAT' not in os.environ: 192 os.environ['PLAT'] = get_platform() 193 194 _environ_checked = 1 195 196 197 def subst_vars (s, local_vars): 198 """Perform shell/Perl-style variable substitution on 'string'. Every 199 occurrence of '$' followed by a name is considered a variable, and 200 variable is substituted by the value found in the 'local_vars' 201 dictionary, or in 'os.environ' if it's not in 'local_vars'. 202 'os.environ' is first checked/augmented to guarantee that it contains 203 certain values: see 'check_environ()'. Raise ValueError for any 204 variables not found in either 'local_vars' or 'os.environ'. 205 """ 206 check_environ() 207 def _subst (match, local_vars=local_vars): 208 var_name = match.group(1) 209 if var_name in local_vars: 210 return str(local_vars[var_name]) 211 else: 212 return os.environ[var_name] 213 214 try: 215 return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s) 216 except KeyError, var: 217 raise ValueError, "invalid variable '$%s'" % var 218 219 # subst_vars () 220 221 222 def grok_environment_error (exc, prefix="error: "): 223 """Generate a useful error message from an EnvironmentError (IOError or 224 OSError) exception object. Handles Python 1.5.1 and 1.5.2 styles, and 225 does what it can to deal with exception objects that don't have a 226 filename (which happens when the error is due to a two-file operation, 227 such as 'rename()' or 'link()'. Returns the error message as a string 228 prefixed with 'prefix'. 229 """ 230 # check for Python 1.5.2-style {IO,OS}Error exception objects 231 if hasattr(exc, 'filename') and hasattr(exc, 'strerror'): 232 if exc.filename: 233 error = prefix + "%s: %s" % (exc.filename, exc.strerror) 234 else: 235 # two-argument functions in posix module don't 236 # include the filename in the exception object! 237 error = prefix + "%s" % exc.strerror 238 else: 239 error = prefix + str(exc[-1]) 240 241 return error 242 243 244 # Needed by 'split_quoted()' 245 _wordchars_re = _squote_re = _dquote_re = None 246 def _init_regex(): 247 global _wordchars_re, _squote_re, _dquote_re 248 _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace) 249 _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'") 250 _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"') 251 252 def split_quoted (s): 253 """Split a string up according to Unix shell-like rules for quotes and 254 backslashes. In short: words are delimited by spaces, as long as those 255 spaces are not escaped by a backslash, or inside a quoted string. 256 Single and double quotes are equivalent, and the quote characters can 257 be backslash-escaped. The backslash is stripped from any two-character 258 escape sequence, leaving only the escaped character. The quote 259 characters are stripped from any quoted string. Returns a list of 260 words. 261 """ 262 263 # This is a nice algorithm for splitting up a single string, since it 264 # doesn't require character-by-character examination. It was a little 265 # bit of a brain-bender to get it working right, though... 266 if _wordchars_re is None: _init_regex() 267 268 s = string.strip(s) 269 words = [] 270 pos = 0 271 272 while s: 273 m = _wordchars_re.match(s, pos) 274 end = m.end() 275 if end == len(s): 276 words.append(s[:end]) 277 break 278 279 if s[end] in string.whitespace: # unescaped, unquoted whitespace: now 280 words.append(s[:end]) # we definitely have a word delimiter 281 s = string.lstrip(s[end:]) 282 pos = 0 283 284 elif s[end] == '\\': # preserve whatever is being escaped; 285 # will become part of the current word 286 s = s[:end] + s[end+1:] 287 pos = end+1 288 289 else: 290 if s[end] == "'": # slurp singly-quoted string 291 m = _squote_re.match(s, end) 292 elif s[end] == '"': # slurp doubly-quoted string 293 m = _dquote_re.match(s, end) 294 else: 295 raise RuntimeError, \ 296 "this can't happen (bad char '%c')" % s[end] 297 298 if m is None: 299 raise ValueError, \ 300 "bad string (mismatched %s quotes?)" % s[end] 301 302 (beg, end) = m.span() 303 s = s[:beg] + s[beg+1:end-1] + s[end:] 304 pos = m.end() - 2 305 306 if pos >= len(s): 307 words.append(s) 308 break 309 310 return words 311 312 # split_quoted () 313 314 315 def execute (func, args, msg=None, verbose=0, dry_run=0): 316 """Perform some action that affects the outside world (eg. by 317 writing to the filesystem). Such actions are special because they 318 are disabled by the 'dry_run' flag. This method takes care of all 319 that bureaucracy for you; all you have to do is supply the 320 function to call and an argument tuple for it (to embody the 321 "external action" being performed), and an optional message to 322 print. 323 """ 324 if msg is None: 325 msg = "%s%r" % (func.__name__, args) 326 if msg[-2:] == ',)': # correct for singleton tuple 327 msg = msg[0:-2] + ')' 328 329 log.info(msg) 330 if not dry_run: 331 func(*args) 332 333 334 def strtobool (val): 335 """Convert a string representation of truth to true (1) or false (0). 336 337 True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values 338 are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if 339 'val' is anything else. 340 """ 341 val = string.lower(val) 342 if val in ('y', 'yes', 't', 'true', 'on', '1'): 343 return 1 344 elif val in ('n', 'no', 'f', 'false', 'off', '0'): 345 return 0 346 else: 347 raise ValueError, "invalid truth value %r" % (val,) 348 349 350 def byte_compile (py_files, 351 optimize=0, force=0, 352 prefix=None, base_dir=None, 353 verbose=1, dry_run=0, 354 direct=None): 355 """Byte-compile a collection of Python source files to either .pyc 356 or .pyo files in the same directory. 'py_files' is a list of files 357 to compile; any files that don't end in ".py" are silently skipped. 358 'optimize' must be one of the following: 359 0 - don't optimize (generate .pyc) 360 1 - normal optimization (like "python -O") 361 2 - extra optimization (like "python -OO") 362 If 'force' is true, all files are recompiled regardless of 363 timestamps. 364 365 The source filename encoded in each bytecode file defaults to the 366 filenames listed in 'py_files'; you can modify these with 'prefix' and 367 'basedir'. 'prefix' is a string that will be stripped off of each 368 source filename, and 'base_dir' is a directory name that will be 369 prepended (after 'prefix' is stripped). You can supply either or both 370 (or neither) of 'prefix' and 'base_dir', as you wish. 371 372 If 'dry_run' is true, doesn't actually do anything that would 373 affect the filesystem. 374 375 Byte-compilation is either done directly in this interpreter process 376 with the standard py_compile module, or indirectly by writing a 377 temporary script and executing it. Normally, you should let 378 'byte_compile()' figure out to use direct compilation or not (see 379 the source for details). The 'direct' flag is used by the script 380 generated in indirect mode; unless you know what you're doing, leave 381 it set to None. 382 """ 383 # nothing is done if sys.dont_write_bytecode is True 384 if sys.dont_write_bytecode: 385 raise DistutilsByteCompileError('byte-compiling is disabled.') 386 387 # First, if the caller didn't force us into direct or indirect mode, 388 # figure out which mode we should be in. We take a conservative 389 # approach: choose direct mode *only* if the current interpreter is 390 # in debug mode and optimize is 0. If we're not in debug mode (-O 391 # or -OO), we don't know which level of optimization this 392 # interpreter is running with, so we can't do direct 393 # byte-compilation and be certain that it's the right thing. Thus, 394 # always compile indirectly if the current interpreter is in either 395 # optimize mode, or if either optimization level was requested by 396 # the caller. 397 if direct is None: 398 direct = (__debug__ and optimize == 0) 399 400 # "Indirect" byte-compilation: write a temporary script and then 401 # run it with the appropriate flags. 402 if not direct: 403 try: 404 from tempfile import mkstemp 405 (script_fd, script_name) = mkstemp(".py") 406 except ImportError: 407 from tempfile import mktemp 408 (script_fd, script_name) = None, mktemp(".py") 409 log.info("writing byte-compilation script '%s'", script_name) 410 if not dry_run: 411 if script_fd is not None: 412 script = os.fdopen(script_fd, "w") 413 else: 414 script = open(script_name, "w") 415 416 script.write("""\ 417 from distutils.util import byte_compile 418 files = [ 419 """) 420 421 # XXX would be nice to write absolute filenames, just for 422 # safety's sake (script should be more robust in the face of 423 # chdir'ing before running it). But this requires abspath'ing 424 # 'prefix' as well, and that breaks the hack in build_lib's 425 # 'byte_compile()' method that carefully tacks on a trailing 426 # slash (os.sep really) to make sure the prefix here is "just 427 # right". This whole prefix business is rather delicate -- the 428 # problem is that it's really a directory, but I'm treating it 429 # as a dumb string, so trailing slashes and so forth matter. 430 431 #py_files = map(os.path.abspath, py_files) 432 #if prefix: 433 # prefix = os.path.abspath(prefix) 434 435 script.write(string.join(map(repr, py_files), ",\n") + "]\n") 436 script.write(""" 437 byte_compile(files, optimize=%r, force=%r, 438 prefix=%r, base_dir=%r, 439 verbose=%r, dry_run=0, 440 direct=1) 441 """ % (optimize, force, prefix, base_dir, verbose)) 442 443 script.close() 444 445 cmd = [sys.executable, script_name] 446 if optimize == 1: 447 cmd.insert(1, "-O") 448 elif optimize == 2: 449 cmd.insert(1, "-OO") 450 spawn(cmd, dry_run=dry_run) 451 execute(os.remove, (script_name,), "removing %s" % script_name, 452 dry_run=dry_run) 453 454 # "Direct" byte-compilation: use the py_compile module to compile 455 # right here, right now. Note that the script generated in indirect 456 # mode simply calls 'byte_compile()' in direct mode, a weird sort of 457 # cross-process recursion. Hey, it works! 458 else: 459 from py_compile import compile 460 461 for file in py_files: 462 if file[-3:] != ".py": 463 # This lets us be lazy and not filter filenames in 464 # the "install_lib" command. 465 continue 466 467 # Terminology from the py_compile module: 468 # cfile - byte-compiled file 469 # dfile - purported source filename (same as 'file' by default) 470 cfile = file + (__debug__ and "c" or "o") 471 dfile = file 472 if prefix: 473 if file[:len(prefix)] != prefix: 474 raise ValueError, \ 475 ("invalid prefix: filename %r doesn't start with %r" 476 % (file, prefix)) 477 dfile = dfile[len(prefix):] 478 if base_dir: 479 dfile = os.path.join(base_dir, dfile) 480 481 cfile_base = os.path.basename(cfile) 482 if direct: 483 if force or newer(file, cfile): 484 log.info("byte-compiling %s to %s", file, cfile_base) 485 if not dry_run: 486 compile(file, cfile, dfile) 487 else: 488 log.debug("skipping byte-compilation of %s to %s", 489 file, cfile_base) 490 491 # byte_compile () 492 493 def rfc822_escape (header): 494 """Return a version of the string escaped for inclusion in an 495 RFC-822 header, by ensuring there are 8 spaces space after each newline. 496 """ 497 lines = string.split(header, '\n') 498 header = string.join(lines, '\n' + 8*' ') 499 return header 500