1"""distutils.util
2
3Miscellaneous utility functions -- anything that doesn't fit into
4one of the other *util.py modules.
5"""
6
7__revision__ = "$Id$"
8
9import sys, os, string, re
10from distutils.errors import DistutilsPlatformError
11from distutils.dep_util import newer
12from distutils.spawn import spawn
13from distutils import log
14from distutils.errors import DistutilsByteCompileError
15
16def get_platform ():
17    """Return a string that identifies the current platform.  This is used
18    mainly to distinguish platform-specific build directories and
19    platform-specific built distributions.  Typically includes the OS name
20    and version and the architecture (as supplied by 'os.uname()'),
21    although the exact information included depends on the OS; eg. for IRIX
22    the architecture isn't particularly important (IRIX only runs on SGI
23    hardware), but for Linux the kernel version isn't particularly
24    important.
25
26    Examples of returned values:
27       linux-i586
28       linux-alpha (?)
29       solaris-2.6-sun4u
30       irix-5.3
31       irix64-6.2
32
33    Windows will return one of:
34       win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
35       win-ia64 (64bit Windows on Itanium)
36       win32 (all others - specifically, sys.platform is returned)
37
38    For other non-POSIX platforms, currently just returns 'sys.platform'.
39    """
40    if os.name == 'nt':
41        # sniff sys.version for architecture.
42        prefix = " bit ("
43        i = string.find(sys.version, prefix)
44        if i == -1:
45            return sys.platform
46        j = string.find(sys.version, ")", i)
47        look = sys.version[i+len(prefix):j].lower()
48        if look=='amd64':
49            return 'win-amd64'
50        if look=='itanium':
51            return 'win-ia64'
52        return sys.platform
53
54    if os.name != "posix" or not hasattr(os, 'uname'):
55        # XXX what about the architecture? NT is Intel or Alpha,
56        # Mac OS is M68k or PPC, etc.
57        return sys.platform
58
59    # Try to distinguish various flavours of Unix
60
61    (osname, host, release, version, machine) = os.uname()
62
63    # Convert the OS name to lowercase, remove '/' characters
64    # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
65    osname = string.lower(osname)
66    osname = string.replace(osname, '/', '')
67    machine = string.replace(machine, ' ', '_')
68    machine = string.replace(machine, '/', '-')
69
70    if osname[:5] == "linux":
71        # At least on Linux/Intel, 'machine' is the processor --
72        # i386, etc.
73        # XXX what about Alpha, SPARC, etc?
74        return  "%s-%s" % (osname, machine)
75    elif osname[:5] == "sunos":
76        if release[0] >= "5":           # SunOS 5 == Solaris 2
77            osname = "solaris"
78            release = "%d.%s" % (int(release[0]) - 3, release[2:])
79        # fall through to standard osname-release-machine representation
80    elif osname[:4] == "irix":              # could be "irix64"!
81        return "%s-%s" % (osname, release)
82    elif osname[:3] == "aix":
83        return "%s-%s.%s" % (osname, version, release)
84    elif osname[:6] == "cygwin":
85        osname = "cygwin"
86        rel_re = re.compile (r'[\d.]+')
87        m = rel_re.match(release)
88        if m:
89            release = m.group()
90    elif osname[:6] == "darwin":
91        #
92        # For our purposes, we'll assume that the system version from
93        # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
94        # to. This makes the compatibility story a bit more sane because the
95        # machine is going to compile and link as if it were
96        # MACOSX_DEPLOYMENT_TARGET.
97        from distutils.sysconfig import get_config_vars
98        cfgvars = get_config_vars()
99
100        macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
101
102        if 1:
103            # Always calculate the release of the running machine,
104            # needed to determine if we can build fat binaries or not.
105
106            macrelease = macver
107            # Get the system version. Reading this plist is a documented
108            # way to get the system version (see the documentation for
109            # the Gestalt Manager)
110            try:
111                f = open('/System/Library/CoreServices/SystemVersion.plist')
112            except IOError:
113                # We're on a plain darwin box, fall back to the default
114                # behaviour.
115                pass
116            else:
117                try:
118                    m = re.search(
119                            r'<key>ProductUserVisibleVersion</key>\s*' +
120                            r'<string>(.*?)</string>', f.read())
121                    if m is not None:
122                        macrelease = '.'.join(m.group(1).split('.')[:2])
123                    # else: fall back to the default behaviour
124                finally:
125                    f.close()
126
127        if not macver:
128            macver = macrelease
129
130        if macver:
131            from distutils.sysconfig import get_config_vars
132            release = macver
133            osname = "macosx"
134
135            if (macrelease + '.') >= '10.4.' and \
136                    '-arch' in get_config_vars().get('CFLAGS', '').strip():
137                # The universal build will build fat binaries, but not on
138                # systems before 10.4
139                #
140                # Try to detect 4-way universal builds, those have machine-type
141                # 'universal' instead of 'fat'.
142
143                machine = 'fat'
144                cflags = get_config_vars().get('CFLAGS')
145
146                archs = re.findall('-arch\s+(\S+)', cflags)
147                archs = tuple(sorted(set(archs)))
148
149                if len(archs) == 1:
150                    machine = archs[0]
151                elif archs == ('i386', 'ppc'):
152                    machine = 'fat'
153                elif archs == ('i386', 'x86_64'):
154                    machine = 'intel'
155                elif archs == ('i386', 'ppc', 'x86_64'):
156                    machine = 'fat3'
157                elif archs == ('ppc64', 'x86_64'):
158                    machine = 'fat64'
159                elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
160                    machine = 'universal'
161                else:
162                    raise ValueError(
163                       "Don't know machine value for archs=%r"%(archs,))
164
165            elif machine == 'i386':
166                # On OSX the machine type returned by uname is always the
167                # 32-bit variant, even if the executable architecture is
168                # the 64-bit variant
169                if sys.maxint >= 2**32:
170                    machine = 'x86_64'
171
172            elif machine in ('PowerPC', 'Power_Macintosh'):
173                # Pick a sane name for the PPC architecture.
174                machine = 'ppc'
175
176                # See 'i386' case
177                if sys.maxint >= 2**32:
178                    machine = 'ppc64'
179
180    return "%s-%s-%s" % (osname, release, machine)
181
182# get_platform ()
183
184
185def convert_path (pathname):
186    """Return 'pathname' as a name that will work on the native filesystem,
187    i.e. split it on '/' and put it back together again using the current
188    directory separator.  Needed because filenames in the setup script are
189    always supplied in Unix style, and have to be converted to the local
190    convention before we can actually use them in the filesystem.  Raises
191    ValueError on non-Unix-ish systems if 'pathname' either starts or
192    ends with a slash.
193    """
194    if os.sep == '/':
195        return pathname
196    if not pathname:
197        return pathname
198    if pathname[0] == '/':
199        raise ValueError, "path '%s' cannot be absolute" % pathname
200    if pathname[-1] == '/':
201        raise ValueError, "path '%s' cannot end with '/'" % pathname
202
203    paths = string.split(pathname, '/')
204    while '.' in paths:
205        paths.remove('.')
206    if not paths:
207        return os.curdir
208    return os.path.join(*paths)
209
210# convert_path ()
211
212
213def change_root (new_root, pathname):
214    """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
215    relative, this is equivalent to "os.path.join(new_root,pathname)".
216    Otherwise, it requires making 'pathname' relative and then joining the
217    two, which is tricky on DOS/Windows and Mac OS.
218    """
219    if os.name == 'posix':
220        if not os.path.isabs(pathname):
221            return os.path.join(new_root, pathname)
222        else:
223            return os.path.join(new_root, pathname[1:])
224
225    elif os.name == 'nt':
226        (drive, path) = os.path.splitdrive(pathname)
227        if path[0] == '\\':
228            path = path[1:]
229        return os.path.join(new_root, path)
230
231    elif os.name == 'os2':
232        (drive, path) = os.path.splitdrive(pathname)
233        if path[0] == os.sep:
234            path = path[1:]
235        return os.path.join(new_root, path)
236
237    else:
238        raise DistutilsPlatformError, \
239              "nothing known about platform '%s'" % os.name
240
241
242_environ_checked = 0
243def check_environ ():
244    """Ensure that 'os.environ' has all the environment variables we
245    guarantee that users can use in config files, command-line options,
246    etc.  Currently this includes:
247      HOME - user's home directory (Unix only)
248      PLAT - description of the current platform, including hardware
249             and OS (see 'get_platform()')
250    """
251    global _environ_checked
252    if _environ_checked:
253        return
254
255    if os.name == 'posix' and 'HOME' not in os.environ:
256        import pwd
257        os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
258
259    if 'PLAT' not in os.environ:
260        os.environ['PLAT'] = get_platform()
261
262    _environ_checked = 1
263
264
265def subst_vars (s, local_vars):
266    """Perform shell/Perl-style variable substitution on 'string'.  Every
267    occurrence of '$' followed by a name is considered a variable, and
268    variable is substituted by the value found in the 'local_vars'
269    dictionary, or in 'os.environ' if it's not in 'local_vars'.
270    'os.environ' is first checked/augmented to guarantee that it contains
271    certain values: see 'check_environ()'.  Raise ValueError for any
272    variables not found in either 'local_vars' or 'os.environ'.
273    """
274    check_environ()
275    def _subst (match, local_vars=local_vars):
276        var_name = match.group(1)
277        if var_name in local_vars:
278            return str(local_vars[var_name])
279        else:
280            return os.environ[var_name]
281
282    try:
283        return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
284    except KeyError, var:
285        raise ValueError, "invalid variable '$%s'" % var
286
287# subst_vars ()
288
289
290def grok_environment_error (exc, prefix="error: "):
291    """Generate a useful error message from an EnvironmentError (IOError or
292    OSError) exception object.  Handles Python 1.5.1 and 1.5.2 styles, and
293    does what it can to deal with exception objects that don't have a
294    filename (which happens when the error is due to a two-file operation,
295    such as 'rename()' or 'link()'.  Returns the error message as a string
296    prefixed with 'prefix'.
297    """
298    # check for Python 1.5.2-style {IO,OS}Error exception objects
299    if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
300        if exc.filename:
301            error = prefix + "%s: %s" % (exc.filename, exc.strerror)
302        else:
303            # two-argument functions in posix module don't
304            # include the filename in the exception object!
305            error = prefix + "%s" % exc.strerror
306    else:
307        error = prefix + str(exc[-1])
308
309    return error
310
311
312# Needed by 'split_quoted()'
313_wordchars_re = _squote_re = _dquote_re = None
314def _init_regex():
315    global _wordchars_re, _squote_re, _dquote_re
316    _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
317    _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
318    _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
319
320def split_quoted (s):
321    """Split a string up according to Unix shell-like rules for quotes and
322    backslashes.  In short: words are delimited by spaces, as long as those
323    spaces are not escaped by a backslash, or inside a quoted string.
324    Single and double quotes are equivalent, and the quote characters can
325    be backslash-escaped.  The backslash is stripped from any two-character
326    escape sequence, leaving only the escaped character.  The quote
327    characters are stripped from any quoted string.  Returns a list of
328    words.
329    """
330
331    # This is a nice algorithm for splitting up a single string, since it
332    # doesn't require character-by-character examination.  It was a little
333    # bit of a brain-bender to get it working right, though...
334    if _wordchars_re is None: _init_regex()
335
336    s = string.strip(s)
337    words = []
338    pos = 0
339
340    while s:
341        m = _wordchars_re.match(s, pos)
342        end = m.end()
343        if end == len(s):
344            words.append(s[:end])
345            break
346
347        if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
348            words.append(s[:end])       # we definitely have a word delimiter
349            s = string.lstrip(s[end:])
350            pos = 0
351
352        elif s[end] == '\\':            # preserve whatever is being escaped;
353                                        # will become part of the current word
354            s = s[:end] + s[end+1:]
355            pos = end+1
356
357        else:
358            if s[end] == "'":           # slurp singly-quoted string
359                m = _squote_re.match(s, end)
360            elif s[end] == '"':         # slurp doubly-quoted string
361                m = _dquote_re.match(s, end)
362            else:
363                raise RuntimeError, \
364                      "this can't happen (bad char '%c')" % s[end]
365
366            if m is None:
367                raise ValueError, \
368                      "bad string (mismatched %s quotes?)" % s[end]
369
370            (beg, end) = m.span()
371            s = s[:beg] + s[beg+1:end-1] + s[end:]
372            pos = m.end() - 2
373
374        if pos >= len(s):
375            words.append(s)
376            break
377
378    return words
379
380# split_quoted ()
381
382
383def execute (func, args, msg=None, verbose=0, dry_run=0):
384    """Perform some action that affects the outside world (eg.  by
385    writing to the filesystem).  Such actions are special because they
386    are disabled by the 'dry_run' flag.  This method takes care of all
387    that bureaucracy for you; all you have to do is supply the
388    function to call and an argument tuple for it (to embody the
389    "external action" being performed), and an optional message to
390    print.
391    """
392    if msg is None:
393        msg = "%s%r" % (func.__name__, args)
394        if msg[-2:] == ',)':        # correct for singleton tuple
395            msg = msg[0:-2] + ')'
396
397    log.info(msg)
398    if not dry_run:
399        func(*args)
400
401
402def strtobool (val):
403    """Convert a string representation of truth to true (1) or false (0).
404
405    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
406    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
407    'val' is anything else.
408    """
409    val = string.lower(val)
410    if val in ('y', 'yes', 't', 'true', 'on', '1'):
411        return 1
412    elif val in ('n', 'no', 'f', 'false', 'off', '0'):
413        return 0
414    else:
415        raise ValueError, "invalid truth value %r" % (val,)
416
417
418def byte_compile (py_files,
419                  optimize=0, force=0,
420                  prefix=None, base_dir=None,
421                  verbose=1, dry_run=0,
422                  direct=None):
423    """Byte-compile a collection of Python source files to either .pyc
424    or .pyo files in the same directory.  'py_files' is a list of files
425    to compile; any files that don't end in ".py" are silently skipped.
426    'optimize' must be one of the following:
427      0 - don't optimize (generate .pyc)
428      1 - normal optimization (like "python -O")
429      2 - extra optimization (like "python -OO")
430    If 'force' is true, all files are recompiled regardless of
431    timestamps.
432
433    The source filename encoded in each bytecode file defaults to the
434    filenames listed in 'py_files'; you can modify these with 'prefix' and
435    'basedir'.  'prefix' is a string that will be stripped off of each
436    source filename, and 'base_dir' is a directory name that will be
437    prepended (after 'prefix' is stripped).  You can supply either or both
438    (or neither) of 'prefix' and 'base_dir', as you wish.
439
440    If 'dry_run' is true, doesn't actually do anything that would
441    affect the filesystem.
442
443    Byte-compilation is either done directly in this interpreter process
444    with the standard py_compile module, or indirectly by writing a
445    temporary script and executing it.  Normally, you should let
446    'byte_compile()' figure out to use direct compilation or not (see
447    the source for details).  The 'direct' flag is used by the script
448    generated in indirect mode; unless you know what you're doing, leave
449    it set to None.
450    """
451    # nothing is done if sys.dont_write_bytecode is True
452    if sys.dont_write_bytecode:
453        raise DistutilsByteCompileError('byte-compiling is disabled.')
454
455    # First, if the caller didn't force us into direct or indirect mode,
456    # figure out which mode we should be in.  We take a conservative
457    # approach: choose direct mode *only* if the current interpreter is
458    # in debug mode and optimize is 0.  If we're not in debug mode (-O
459    # or -OO), we don't know which level of optimization this
460    # interpreter is running with, so we can't do direct
461    # byte-compilation and be certain that it's the right thing.  Thus,
462    # always compile indirectly if the current interpreter is in either
463    # optimize mode, or if either optimization level was requested by
464    # the caller.
465    if direct is None:
466        direct = (__debug__ and optimize == 0)
467
468    # "Indirect" byte-compilation: write a temporary script and then
469    # run it with the appropriate flags.
470    if not direct:
471        try:
472            from tempfile import mkstemp
473            (script_fd, script_name) = mkstemp(".py")
474        except ImportError:
475            from tempfile import mktemp
476            (script_fd, script_name) = None, mktemp(".py")
477        log.info("writing byte-compilation script '%s'", script_name)
478        if not dry_run:
479            if script_fd is not None:
480                script = os.fdopen(script_fd, "w")
481            else:
482                script = open(script_name, "w")
483
484            script.write("""\
485from distutils.util import byte_compile
486files = [
487""")
488
489            # XXX would be nice to write absolute filenames, just for
490            # safety's sake (script should be more robust in the face of
491            # chdir'ing before running it).  But this requires abspath'ing
492            # 'prefix' as well, and that breaks the hack in build_lib's
493            # 'byte_compile()' method that carefully tacks on a trailing
494            # slash (os.sep really) to make sure the prefix here is "just
495            # right".  This whole prefix business is rather delicate -- the
496            # problem is that it's really a directory, but I'm treating it
497            # as a dumb string, so trailing slashes and so forth matter.
498
499            #py_files = map(os.path.abspath, py_files)
500            #if prefix:
501            #    prefix = os.path.abspath(prefix)
502
503            script.write(string.join(map(repr, py_files), ",\n") + "]\n")
504            script.write("""
505byte_compile(files, optimize=%r, force=%r,
506             prefix=%r, base_dir=%r,
507             verbose=%r, dry_run=0,
508             direct=1)
509""" % (optimize, force, prefix, base_dir, verbose))
510
511            script.close()
512
513        cmd = [sys.executable, script_name]
514        if optimize == 1:
515            cmd.insert(1, "-O")
516        elif optimize == 2:
517            cmd.insert(1, "-OO")
518        spawn(cmd, dry_run=dry_run)
519        execute(os.remove, (script_name,), "removing %s" % script_name,
520                dry_run=dry_run)
521
522    # "Direct" byte-compilation: use the py_compile module to compile
523    # right here, right now.  Note that the script generated in indirect
524    # mode simply calls 'byte_compile()' in direct mode, a weird sort of
525    # cross-process recursion.  Hey, it works!
526    else:
527        from py_compile import compile
528
529        for file in py_files:
530            if file[-3:] != ".py":
531                # This lets us be lazy and not filter filenames in
532                # the "install_lib" command.
533                continue
534
535            # Terminology from the py_compile module:
536            #   cfile - byte-compiled file
537            #   dfile - purported source filename (same as 'file' by default)
538            cfile = file + (__debug__ and "c" or "o")
539            dfile = file
540            if prefix:
541                if file[:len(prefix)] != prefix:
542                    raise ValueError, \
543                          ("invalid prefix: filename %r doesn't start with %r"
544                           % (file, prefix))
545                dfile = dfile[len(prefix):]
546            if base_dir:
547                dfile = os.path.join(base_dir, dfile)
548
549            cfile_base = os.path.basename(cfile)
550            if direct:
551                if force or newer(file, cfile):
552                    log.info("byte-compiling %s to %s", file, cfile_base)
553                    if not dry_run:
554                        compile(file, cfile, dfile)
555                else:
556                    log.debug("skipping byte-compilation of %s to %s",
557                              file, cfile_base)
558
559# byte_compile ()
560
561def rfc822_escape (header):
562    """Return a version of the string escaped for inclusion in an
563    RFC-822 header, by ensuring there are 8 spaces space after each newline.
564    """
565    lines = string.split(header, '\n')
566    header = string.join(lines, '\n' + 8*' ')
567    return header
568