Home | History | Annotate | Download | only in native
      1 /*
      2  * Copyright (c) 1995, 2008, Oracle and/or its affiliates. All rights reserved.
      3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
      4  *
      5  * This code is free software; you can redistribute it and/or modify it
      6  * under the terms of the GNU General Public License version 2 only, as
      7  * published by the Free Software Foundation.  Oracle designates this
      8  * particular file as subject to the "Classpath" exception as provided
      9  * by Oracle in the LICENSE file that accompanied this code.
     10  *
     11  * This code is distributed in the hope that it will be useful, but WITHOUT
     12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     14  * version 2 for more details (a copy is included in the LICENSE file that
     15  * accompanied this code).
     16  *
     17  * You should have received a copy of the GNU General Public License version
     18  * 2 along with this work; if not, write to the Free Software Foundation,
     19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     20  *
     21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
     22  * or visit www.oracle.com if you need additional information or have any
     23  * questions.
     24  */
     25 
     26 #undef  _LARGEFILE64_SOURCE
     27 #define _LARGEFILE64_SOURCE 1
     28 
     29 #include "jni.h"
     30 #include "jvm.h"
     31 #include "jvm_md.h"
     32 #include "jni_util.h"
     33 #include "io_util.h"
     34 #include <nativehelper/JNIHelp.h>
     35 
     36 #define NATIVE_METHOD(className, functionName, signature) \
     37 { #functionName, signature, (void*)(className ## _ ## functionName) }
     38 
     39 /*
     40  * Platform-specific support for java.lang.Process
     41  */
     42 #include <assert.h>
     43 #include <stddef.h>
     44 #include <stdlib.h>
     45 #include <sys/types.h>
     46 #include <ctype.h>
     47 #ifdef _ALLBSD_SOURCE
     48 #include <wait.h>
     49 #else
     50 #include <sys/wait.h>
     51 #endif
     52 #include <signal.h>
     53 #include <string.h>
     54 #include <errno.h>
     55 #include <dirent.h>
     56 #include <unistd.h>
     57 #include <fcntl.h>
     58 #include <limits.h>
     59 
     60 #ifdef __APPLE__
     61 #include <crt_externs.h>
     62 #define environ (*_NSGetEnviron())
     63 #endif
     64 
     65 /*
     66  * There are 3 possible strategies we might use to "fork":
     67  *
     68  * - fork(2).  Very portable and reliable but subject to
     69  *   failure due to overcommit (see the documentation on
     70  *   /proc/sys/vm/overcommit_memory in Linux proc(5)).
     71  *   This is the ancient problem of spurious failure whenever a large
     72  *   process starts a small subprocess.
     73  *
     74  * - vfork().  Using this is scary because all relevant man pages
     75  *   contain dire warnings, e.g. Linux vfork(2).  But at least it's
     76  *   documented in the glibc docs and is standardized by XPG4.
     77  *   http://www.opengroup.org/onlinepubs/000095399/functions/vfork.html
     78  *   On Linux, one might think that vfork() would be implemented using
     79  *   the clone system call with flag CLONE_VFORK, but in fact vfork is
     80  *   a separate system call (which is a good sign, suggesting that
     81  *   vfork will continue to be supported at least on Linux).
     82  *   Another good sign is that glibc implements posix_spawn using
     83  *   vfork whenever possible.  Note that we cannot use posix_spawn
     84  *   ourselves because there's no reliable way to close all inherited
     85  *   file descriptors.
     86  *
     87  * - clone() with flags CLONE_VM but not CLONE_THREAD.  clone() is
     88  *   Linux-specific, but this ought to work - at least the glibc
     89  *   sources contain code to handle different combinations of CLONE_VM
     90  *   and CLONE_THREAD.  However, when this was implemented, it
     91  *   appeared to fail on 32-bit i386 (but not 64-bit x86_64) Linux with
     92  *   the simple program
     93  *     Runtime.getRuntime().exec("/bin/true").waitFor();
     94  *   with:
     95  *     #  Internal Error (os_linux_x86.cpp:683), pid=19940, tid=2934639536
     96  *     #  Error: pthread_getattr_np failed with errno = 3 (ESRCH)
     97  *   We believe this is a glibc bug, reported here:
     98  *     http://sources.redhat.com/bugzilla/show_bug.cgi?id=10311
     99  *   but the glibc maintainers closed it as WONTFIX.
    100  *
    101  * Based on the above analysis, we are currently using vfork() on
    102  * Linux and fork() on other Unix systems, but the code to use clone()
    103  * remains.
    104  */
    105 
    106 #define START_CHILD_USE_CLONE 0  /* clone() currently disabled; see above. */
    107 
    108 #ifndef START_CHILD_USE_CLONE
    109   #ifdef __linux__
    110     #define START_CHILD_USE_CLONE 1
    111   #else
    112     #define START_CHILD_USE_CLONE 0
    113   #endif
    114 #endif
    115 
    116 /* By default, use vfork() on Linux. */
    117 #ifndef START_CHILD_USE_VFORK
    118 // Android-changed: disable vfork under AddressSanitizer.
    119 //  #ifdef __linux__
    120   #if defined(__linux__) && !__has_feature(address_sanitizer)
    121     #define START_CHILD_USE_VFORK 1
    122   #else
    123     #define START_CHILD_USE_VFORK 0
    124   #endif
    125 #endif
    126 
    127 #if START_CHILD_USE_CLONE
    128 #include <sched.h>
    129 #define START_CHILD_SYSTEM_CALL "clone"
    130 #elif START_CHILD_USE_VFORK
    131 #define START_CHILD_SYSTEM_CALL "vfork"
    132 #else
    133 #define START_CHILD_SYSTEM_CALL "fork"
    134 #endif
    135 
    136 #ifndef STDIN_FILENO
    137 #define STDIN_FILENO 0
    138 #endif
    139 
    140 #ifndef STDOUT_FILENO
    141 #define STDOUT_FILENO 1
    142 #endif
    143 
    144 #ifndef STDERR_FILENO
    145 #define STDERR_FILENO 2
    146 #endif
    147 
    148 #ifndef SA_NOCLDSTOP
    149 #define SA_NOCLDSTOP 0
    150 #endif
    151 
    152 #ifndef SA_RESTART
    153 #define SA_RESTART 0
    154 #endif
    155 
    156 #define FAIL_FILENO (STDERR_FILENO + 1)
    157 
    158 /* TODO: Refactor. */
    159 #define RESTARTABLE(_cmd, _result) do { \
    160   do { \
    161     (_result) = _cmd; \
    162   } while(((_result) == -1) && (errno == EINTR)); \
    163 } while(0)
    164 
    165 /* This is one of the rare times it's more portable to declare an
    166  * external symbol explicitly, rather than via a system header.
    167  * The declaration is standardized as part of UNIX98, but there is
    168  * no standard (not even de-facto) header file where the
    169  * declaration is to be found.  See:
    170  * http://www.opengroup.org/onlinepubs/009695399/functions/environ.html
    171  * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_02.html
    172  *
    173  * "All identifiers in this volume of IEEE Std 1003.1-2001, except
    174  * environ, are defined in at least one of the headers" (!)
    175  */
    176 extern char **environ;
    177 
    178 
    179 static void
    180 setSIGCHLDHandler(JNIEnv *env)
    181 {
    182     /* There is a subtle difference between having the signal handler
    183      * for SIGCHLD be SIG_DFL and SIG_IGN.  We cannot obtain process
    184      * termination information for child processes if the signal
    185      * handler is SIG_IGN.  It must be SIG_DFL.
    186      *
    187      * We used to set the SIGCHLD handler only on Linux, but it's
    188      * safest to set it unconditionally.
    189      *
    190      * Consider what happens if java's parent process sets the SIGCHLD
    191      * handler to SIG_IGN.  Normally signal handlers are inherited by
    192      * children, but SIGCHLD is a controversial case.  Solaris appears
    193      * to always reset it to SIG_DFL, but this behavior may be
    194      * non-standard-compliant, and we shouldn't rely on it.
    195      *
    196      * References:
    197      * http://www.opengroup.org/onlinepubs/7908799/xsh/exec.html
    198      * http://www.pasc.org/interps/unofficial/db/p1003.1/pasc-1003.1-132.html
    199      */
    200     struct sigaction sa;
    201     sa.sa_handler = SIG_DFL;
    202     sigemptyset(&sa.sa_mask);
    203     sa.sa_flags = SA_NOCLDSTOP | SA_RESTART;
    204     if (sigaction(SIGCHLD, &sa, NULL) < 0)
    205         JNU_ThrowInternalError(env, "Can't set SIGCHLD handler");
    206 }
    207 
    208 static void*
    209 xmalloc(JNIEnv *env, size_t size)
    210 {
    211     void *p = malloc(size);
    212     if (p == NULL)
    213         JNU_ThrowOutOfMemoryError(env, NULL);
    214     return p;
    215 }
    216 
    217 #define NEW(type, n) ((type *) xmalloc(env, (n) * sizeof(type)))
    218 
    219 /**
    220  * If PATH is not defined, the OS provides some default value.
    221  * Unfortunately, there's no portable way to get this value.
    222  * Fortunately, it's only needed if the child has PATH while we do not.
    223  */
    224 static const char*
    225 defaultPath(void)
    226 {
    227 #ifdef __solaris__
    228     /* These really are the Solaris defaults! */
    229     return (geteuid() == 0 || getuid() == 0) ?
    230         "/usr/xpg4/bin:/usr/ccs/bin:/usr/bin:/opt/SUNWspro/bin:/usr/sbin" :
    231         "/usr/xpg4/bin:/usr/ccs/bin:/usr/bin:/opt/SUNWspro/bin:";
    232 #else
    233     return ":/bin:/usr/bin";    /* glibc */
    234 #endif
    235 }
    236 
    237 static const char*
    238 effectivePath(void)
    239 {
    240     const char *s = getenv("PATH");
    241     return (s != NULL) ? s : defaultPath();
    242 }
    243 
    244 static int
    245 countOccurrences(const char *s, char c)
    246 {
    247     int count;
    248     for (count = 0; *s != '\0'; s++)
    249         count += (*s == c);
    250     return count;
    251 }
    252 
    253 static const char * const *
    254 splitPath(JNIEnv *env, const char *path)
    255 {
    256     const char *p, *q;
    257     char **pathv;
    258     int i;
    259     int count = countOccurrences(path, ':') + 1;
    260 
    261     pathv = NEW(char*, count+1);
    262     pathv[count] = NULL;
    263     for (p = path, i = 0; i < count; i++, p = q + 1) {
    264         for (q = p; (*q != ':') && (*q != '\0'); q++)
    265             ;
    266         if (q == p)             /* empty PATH component => "." */
    267             pathv[i] = "./";
    268         else {
    269             int addSlash = ((*(q - 1)) != '/');
    270             pathv[i] = NEW(char, q - p + addSlash + 1);
    271             memcpy(pathv[i], p, q - p);
    272             if (addSlash)
    273                 pathv[i][q - p] = '/';
    274             pathv[i][q - p + addSlash] = '\0';
    275         }
    276     }
    277     return (const char * const *) pathv;
    278 }
    279 
    280 /**
    281  * Cached value of JVM's effective PATH.
    282  * (We don't support putenv("PATH=...") in native code)
    283  */
    284 static const char *parentPath;
    285 
    286 /**
    287  * Split, canonicalized version of parentPath
    288  */
    289 static const char * const *parentPathv;
    290 
    291 static jfieldID field_exitcode;
    292 
    293 JNIEXPORT void JNICALL
    294 UNIXProcess_initIDs(JNIEnv *env, jclass clazz)
    295 {
    296     field_exitcode = (*env)->GetFieldID(env, clazz, "exitcode", "I");
    297 
    298     parentPath  = effectivePath();
    299     parentPathv = splitPath(env, parentPath);
    300 
    301     setSIGCHLDHandler(env);
    302 }
    303 
    304 
    305 #ifndef WIFEXITED
    306 #define WIFEXITED(status) (((status)&0xFF) == 0)
    307 #endif
    308 
    309 #ifndef WEXITSTATUS
    310 #define WEXITSTATUS(status) (((status)>>8)&0xFF)
    311 #endif
    312 
    313 #ifndef WIFSIGNALED
    314 #define WIFSIGNALED(status) (((status)&0xFF) > 0 && ((status)&0xFF00) == 0)
    315 #endif
    316 
    317 #ifndef WTERMSIG
    318 #define WTERMSIG(status) ((status)&0x7F)
    319 #endif
    320 
    321 /* Block until a child process exits and return its exit code.
    322    Note, can only be called once for any given pid. */
    323 JNIEXPORT jint JNICALL
    324 UNIXProcess_waitForProcessExit(JNIEnv* env,
    325                                               jobject junk,
    326                                               jint pid)
    327 {
    328     /* We used to use waitid() on Solaris, waitpid() on Linux, but
    329      * waitpid() is more standard, so use it on all POSIX platforms. */
    330     int status;
    331     /* Wait for the child process to exit.  This returns immediately if
    332        the child has already exited. */
    333     while (waitpid(pid, &status, 0) < 0) {
    334         switch (errno) {
    335         case ECHILD: return 0;
    336         case EINTR: break;
    337         default: return -1;
    338         }
    339     }
    340 
    341     if (WIFEXITED(status)) {
    342         /*
    343          * The child exited normally; get its exit code.
    344          */
    345         return WEXITSTATUS(status);
    346     } else if (WIFSIGNALED(status)) {
    347         /* The child exited because of a signal.
    348          * The best value to return is 0x80 + signal number,
    349          * because that is what all Unix shells do, and because
    350          * it allows callers to distinguish between process exit and
    351          * process death by signal.
    352          * Unfortunately, the historical behavior on Solaris is to return
    353          * the signal number, and we preserve this for compatibility. */
    354 #ifdef __solaris__
    355         return WTERMSIG(status);
    356 #else
    357         return 0x80 + WTERMSIG(status);
    358 #endif
    359     } else {
    360         /*
    361          * Unknown exit code; pass it through.
    362          */
    363         return status;
    364     }
    365 }
    366 
    367 static ssize_t
    368 restartableWrite(int fd, const void *buf, size_t count)
    369 {
    370     ssize_t result;
    371     RESTARTABLE(write(fd, buf, count), result);
    372     return result;
    373 }
    374 
    375 static int
    376 restartableDup2(int fd_from, int fd_to)
    377 {
    378     int err;
    379     RESTARTABLE(dup2(fd_from, fd_to), err);
    380     return err;
    381 }
    382 
    383 static int
    384 restartableClose(int fd)
    385 {
    386     int err;
    387     RESTARTABLE(close(fd), err);
    388     return err;
    389 }
    390 
    391 static int
    392 closeSafely(int fd)
    393 {
    394     return (fd == -1) ? 0 : restartableClose(fd);
    395 }
    396 
    397 static int
    398 isAsciiDigit(char c)
    399 {
    400   return c >= '0' && c <= '9';
    401 }
    402 
    403 #ifdef _ALLBSD_SOURCE
    404 #define FD_DIR "/dev/fd"
    405 #define dirent64 dirent
    406 #define readdir64 readdir
    407 #else
    408 #define FD_DIR "/proc/self/fd"
    409 #endif
    410 
    411 static int
    412 closeDescriptors(void)
    413 {
    414     DIR *dp;
    415     struct dirent64 *dirp;
    416     int from_fd = FAIL_FILENO + 1;
    417 
    418     /* We're trying to close all file descriptors, but opendir() might
    419      * itself be implemented using a file descriptor, and we certainly
    420      * don't want to close that while it's in use.  We assume that if
    421      * opendir() is implemented using a file descriptor, then it uses
    422      * the lowest numbered file descriptor, just like open().  So we
    423      * close a couple explicitly.  */
    424 
    425     restartableClose(from_fd);          /* for possible use by opendir() */
    426     restartableClose(from_fd + 1);      /* another one for good luck */
    427 
    428     if ((dp = opendir(FD_DIR)) == NULL)
    429         return 0;
    430 
    431     /* We use readdir64 instead of readdir to work around Solaris bug
    432      * 6395699: /proc/self/fd fails to report file descriptors >= 1024 on Solaris 9
    433      */
    434     while ((dirp = readdir64(dp)) != NULL) {
    435         int fd;
    436         if (isAsciiDigit(dirp->d_name[0]) &&
    437             (fd = strtol(dirp->d_name, NULL, 10)) >= from_fd + 2)
    438             restartableClose(fd);
    439     }
    440 
    441     closedir(dp);
    442 
    443     return 1;
    444 }
    445 
    446 static int
    447 moveDescriptor(int fd_from, int fd_to)
    448 {
    449     if (fd_from != fd_to) {
    450         if ((restartableDup2(fd_from, fd_to) == -1) ||
    451             (restartableClose(fd_from) == -1))
    452             return -1;
    453     }
    454     return 0;
    455 }
    456 
    457 static const char *
    458 getBytes(JNIEnv *env, jbyteArray arr)
    459 {
    460     return arr == NULL ? NULL :
    461         (const char*) (*env)->GetByteArrayElements(env, arr, NULL);
    462 }
    463 
    464 static void
    465 releaseBytes(JNIEnv *env, jbyteArray arr, const char* parr)
    466 {
    467     if (parr != NULL)
    468         (*env)->ReleaseByteArrayElements(env, arr, (jbyte*) parr, JNI_ABORT);
    469 }
    470 
    471 static void
    472 initVectorFromBlock(const char**vector, const char* block, int count)
    473 {
    474     int i;
    475     const char *p;
    476     for (i = 0, p = block; i < count; i++) {
    477         /* Invariant: p always points to the start of a C string. */
    478         vector[i] = p;
    479         while (*(p++));
    480     }
    481     vector[count] = NULL;
    482 }
    483 
    484 static void
    485 throwIOException(JNIEnv *env, int errnum, const char *defaultDetail)
    486 {
    487     static const char * const format = "error=%d, %s";
    488     const char *detail = defaultDetail;
    489     char *errmsg;
    490     jstring s;
    491 
    492     if (errnum != 0) {
    493         const char *s = strerror(errnum);
    494         if (strcmp(s, "Unknown error") != 0)
    495             detail = s;
    496     }
    497     /* ASCII Decimal representation uses 2.4 times as many bits as binary. */
    498     size_t newsize = strlen(format) + strlen(detail) + 3 * sizeof(errnum);
    499     errmsg = NEW(char, newsize);
    500     snprintf(errmsg, newsize, format, errnum, detail);
    501     s = JNU_NewStringPlatform(env, errmsg);
    502     if (s != NULL) {
    503         jobject x = JNU_NewObjectByName(env, "java/io/IOException",
    504                                         "(Ljava/lang/String;)V", s);
    505         if (x != NULL)
    506             (*env)->Throw(env, x);
    507     }
    508     free(errmsg);
    509 }
    510 
    511 #ifdef DEBUG_PROCESS
    512 /* Debugging process code is difficult; where to write debug output? */
    513 static void
    514 debugPrint(char *format, ...)
    515 {
    516     FILE *tty = fopen("/dev/tty", "w");
    517     va_list ap;
    518     va_start(ap, format);
    519     vfprintf(tty, format, ap);
    520     va_end(ap);
    521     fclose(tty);
    522 }
    523 #endif /* DEBUG_PROCESS */
    524 
    525 /**
    526  * Exec FILE as a traditional Bourne shell script (i.e. one without #!).
    527  * If we could do it over again, we would probably not support such an ancient
    528  * misfeature, but compatibility wins over sanity.  The original support for
    529  * this was imported accidentally from execvp().
    530  */
    531 // Android-added: #if START_CHILD_USE_CLONE || START_CHILD_USE_VFORK
    532 #if START_CHILD_USE_CLONE || START_CHILD_USE_VFORK
    533 static void
    534 execve_as_traditional_shell_script(const char *file,
    535                                    const char *argv[],
    536                                    const char *const envp[])
    537 {
    538     /* Use the extra word of space provided for us in argv by caller. */
    539     const char *argv0 = argv[0];
    540     const char *const *end = argv;
    541     while (*end != NULL)
    542         ++end;
    543     memmove(argv+2, argv+1, (end-argv) * sizeof (*end));
    544     argv[0] = "/bin/sh";
    545     argv[1] = file;
    546     execve(argv[0], (char **) argv, (char **) envp);
    547     /* Can't even exec /bin/sh?  Big trouble, but let's soldier on... */
    548     memmove(argv+1, argv+2, (end-argv) * sizeof (*end));
    549     argv[0] = argv0;
    550 }
    551 #endif
    552 
    553 /**
    554  * Like execve(2), except that in case of ENOEXEC, FILE is assumed to
    555  * be a shell script and the system default shell is invoked to run it.
    556  */
    557 static void
    558 execve_with_shell_fallback(const char *file,
    559                            const char *argv[],
    560                            const char *const envp[])
    561 {
    562 #if START_CHILD_USE_CLONE || START_CHILD_USE_VFORK
    563     /* shared address space; be very careful. */
    564     execve(file, (char **) argv, (char **) envp);
    565     if (errno == ENOEXEC)
    566         execve_as_traditional_shell_script(file, argv, envp);
    567 #else
    568     /* unshared address space; we can mutate environ. */
    569     environ = (char **) envp;
    570     execvp(file, (char **) argv);
    571 #endif
    572 }
    573 
    574 /**
    575  * 'execvpe' should have been included in the Unix standards,
    576  * and is a GNU extension in glibc 2.10.
    577  *
    578  * JDK_execvpe is identical to execvp, except that the child environment is
    579  * specified via the 3rd argument instead of being inherited from environ.
    580  */
    581 static void
    582 JDK_execvpe(const char *file,
    583             const char *argv[],
    584             const char *const envp[])
    585 {
    586     if (envp == NULL || (char **) envp == environ) {
    587         execvp(file, (char **) argv);
    588         return;
    589     }
    590 
    591     if (*file == '\0') {
    592         errno = ENOENT;
    593         return;
    594     }
    595 
    596     if (strchr(file, '/') != NULL) {
    597         execve_with_shell_fallback(file, argv, envp);
    598     } else {
    599         /* We must search PATH (parent's, not child's) */
    600         char expanded_file[PATH_MAX];
    601         int filelen = strlen(file);
    602         int sticky_errno = 0;
    603         const char * const * dirs;
    604         for (dirs = parentPathv; *dirs; dirs++) {
    605             const char * dir = *dirs;
    606             int dirlen = strlen(dir);
    607             if (filelen + dirlen + 1 >= PATH_MAX) {
    608                 errno = ENAMETOOLONG;
    609                 continue;
    610             }
    611             memcpy(expanded_file, dir, dirlen);
    612             memcpy(expanded_file + dirlen, file, filelen);
    613             expanded_file[dirlen + filelen] = '\0';
    614             execve_with_shell_fallback(expanded_file, argv, envp);
    615             /* There are 3 responses to various classes of errno:
    616              * return immediately, continue (especially for ENOENT),
    617              * or continue with "sticky" errno.
    618              *
    619              * From exec(3):
    620              *
    621              * If permission is denied for a file (the attempted
    622              * execve returned EACCES), these functions will continue
    623              * searching the rest of the search path.  If no other
    624              * file is found, however, they will return with the
    625              * global variable errno set to EACCES.
    626              */
    627             switch (errno) {
    628             case EACCES:
    629                 sticky_errno = errno;
    630                 /* FALLTHRU */
    631             case ENOENT:
    632             case ENOTDIR:
    633 #ifdef ELOOP
    634             case ELOOP:
    635 #endif
    636 #ifdef ESTALE
    637             case ESTALE:
    638 #endif
    639 #ifdef ENODEV
    640             case ENODEV:
    641 #endif
    642 #ifdef ETIMEDOUT
    643             case ETIMEDOUT:
    644 #endif
    645                 break; /* Try other directories in PATH */
    646             default:
    647                 return;
    648             }
    649         }
    650         if (sticky_errno != 0)
    651             errno = sticky_errno;
    652     }
    653 }
    654 
    655 /*
    656  * Reads nbyte bytes from file descriptor fd into buf,
    657  * The read operation is retried in case of EINTR or partial reads.
    658  *
    659  * Returns number of bytes read (normally nbyte, but may be less in
    660  * case of EOF).  In case of read errors, returns -1 and sets errno.
    661  */
    662 static ssize_t
    663 readFully(int fd, void *buf, size_t nbyte)
    664 {
    665     ssize_t remaining = nbyte;
    666     for (;;) {
    667         ssize_t n = read(fd, buf, remaining);
    668         if (n == 0) {
    669             return nbyte - remaining;
    670         } else if (n > 0) {
    671             remaining -= n;
    672             if (remaining <= 0)
    673                 return nbyte;
    674             /* We were interrupted in the middle of reading the bytes.
    675              * Unlikely, but possible. */
    676             buf = (void *) (((char *)buf) + n);
    677         } else if (errno == EINTR) {
    678             /* Strange signals like SIGJVM1 are possible at any time.
    679              * See http://www.dreamsongs.com/WorseIsBetter.html */
    680         } else {
    681             return -1;
    682         }
    683     }
    684 }
    685 
    686 typedef struct _ChildStuff
    687 {
    688     int in[2];
    689     int out[2];
    690     int err[2];
    691     int fail[2];
    692     int fds[3];
    693     const char **argv;
    694     const char **envv;
    695     const char *pdir;
    696     jboolean redirectErrorStream;
    697 #if START_CHILD_USE_CLONE
    698     void *clone_stack;
    699 #endif
    700 } ChildStuff;
    701 
    702 static void
    703 copyPipe(int from[2], int to[2])
    704 {
    705     to[0] = from[0];
    706     to[1] = from[1];
    707 }
    708 
    709 /**
    710  * Child process after a successful fork() or clone().
    711  * This function must not return, and must be prepared for either all
    712  * of its address space to be shared with its parent, or to be a copy.
    713  * It must not modify global variables such as "environ".
    714  */
    715 static int
    716 childProcess(void *arg)
    717 {
    718     const ChildStuff* p = (const ChildStuff*) arg;
    719 
    720     /* Close the parent sides of the pipes.
    721        Closing pipe fds here is redundant, since closeDescriptors()
    722        would do it anyways, but a little paranoia is a good thing. */
    723     if ((closeSafely(p->in[1])   == -1) ||
    724         (closeSafely(p->out[0])  == -1) ||
    725         (closeSafely(p->err[0])  == -1) ||
    726         (closeSafely(p->fail[0]) == -1))
    727         goto WhyCantJohnnyExec;
    728 
    729     /* Give the child sides of the pipes the right fileno's. */
    730     /* Note: it is possible for in[0] == 0 */
    731     if ((moveDescriptor(p->in[0] != -1 ?  p->in[0] : p->fds[0],
    732                         STDIN_FILENO) == -1) ||
    733         (moveDescriptor(p->out[1]!= -1 ? p->out[1] : p->fds[1],
    734                         STDOUT_FILENO) == -1))
    735         goto WhyCantJohnnyExec;
    736 
    737     if (p->redirectErrorStream) {
    738         if ((closeSafely(p->err[1]) == -1) ||
    739             (restartableDup2(STDOUT_FILENO, STDERR_FILENO) == -1))
    740             goto WhyCantJohnnyExec;
    741     } else {
    742         if (moveDescriptor(p->err[1] != -1 ? p->err[1] : p->fds[2],
    743                            STDERR_FILENO) == -1)
    744             goto WhyCantJohnnyExec;
    745     }
    746 
    747     if (moveDescriptor(p->fail[1], FAIL_FILENO) == -1)
    748         goto WhyCantJohnnyExec;
    749 
    750     /* close everything */
    751     if (closeDescriptors() == 0) { /* failed,  close the old way */
    752         int max_fd = (int)sysconf(_SC_OPEN_MAX);
    753         int fd;
    754         for (fd = FAIL_FILENO + 1; fd < max_fd; fd++)
    755             if (restartableClose(fd) == -1 && errno != EBADF)
    756                 goto WhyCantJohnnyExec;
    757     }
    758 
    759     /* change to the new working directory */
    760     if (p->pdir != NULL && chdir(p->pdir) < 0)
    761         goto WhyCantJohnnyExec;
    762 
    763     if (fcntl(FAIL_FILENO, F_SETFD, FD_CLOEXEC) == -1)
    764         goto WhyCantJohnnyExec;
    765 
    766     JDK_execvpe(p->argv[0], p->argv, p->envv);
    767 
    768  WhyCantJohnnyExec:
    769     /* We used to go to an awful lot of trouble to predict whether the
    770      * child would fail, but there is no reliable way to predict the
    771      * success of an operation without *trying* it, and there's no way
    772      * to try a chdir or exec in the parent.  Instead, all we need is a
    773      * way to communicate any failure back to the parent.  Easy; we just
    774      * send the errno back to the parent over a pipe in case of failure.
    775      * The tricky thing is, how do we communicate the *success* of exec?
    776      * We use FD_CLOEXEC together with the fact that a read() on a pipe
    777      * yields EOF when the write ends (we have two of them!) are closed.
    778      */
    779     {
    780         int errnum = errno;
    781         restartableWrite(FAIL_FILENO, &errnum, sizeof(errnum));
    782     }
    783     restartableClose(FAIL_FILENO);
    784     _exit(-1);
    785     return 0;  /* Suppress warning "no return value from function" */
    786 }
    787 
    788 /**
    789  * Start a child process running function childProcess.
    790  * This function only returns in the parent.
    791  * We are unusually paranoid; use of clone/vfork is
    792  * especially likely to tickle gcc/glibc bugs.
    793  */
    794 #ifdef __attribute_noinline__  /* See: sys/cdefs.h */
    795 __attribute_noinline__
    796 #endif
    797 static pid_t
    798 startChild(ChildStuff *c) {
    799 #if START_CHILD_USE_CLONE
    800 #define START_CHILD_CLONE_STACK_SIZE (64 * 1024)
    801     /*
    802      * See clone(2).
    803      * Instead of worrying about which direction the stack grows, just
    804      * allocate twice as much and start the stack in the middle.
    805      */
    806     if ((c->clone_stack = malloc(2 * START_CHILD_CLONE_STACK_SIZE)) == NULL)
    807         /* errno will be set to ENOMEM */
    808         return -1;
    809     return clone(childProcess,
    810                  c->clone_stack + START_CHILD_CLONE_STACK_SIZE,
    811                  CLONE_VFORK | CLONE_VM | SIGCHLD, c);
    812 #else
    813   #if START_CHILD_USE_VFORK
    814     /*
    815      * We separate the call to vfork into a separate function to make
    816      * very sure to keep stack of child from corrupting stack of parent,
    817      * as suggested by the scary gcc warning:
    818      *  warning: variable 'foo' might be clobbered by 'longjmp' or 'vfork'
    819      */
    820     volatile pid_t resultPid = vfork();
    821   #else
    822     /*
    823      * From Solaris fork(2): In Solaris 10, a call to fork() is
    824      * identical to a call to fork1(); only the calling thread is
    825      * replicated in the child process. This is the POSIX-specified
    826      * behavior for fork().
    827      */
    828     pid_t resultPid = fork();
    829   #endif
    830     if (resultPid == 0)
    831         childProcess(c);
    832     assert(resultPid != 0);  /* childProcess never returns */
    833     return resultPid;
    834 #endif /* ! START_CHILD_USE_CLONE */
    835 }
    836 
    837 JNIEXPORT jint JNICALL
    838 UNIXProcess_forkAndExec(JNIEnv *env,
    839                                        jobject process,
    840                                        jbyteArray prog,
    841                                        jbyteArray argBlock, jint argc,
    842                                        jbyteArray envBlock, jint envc,
    843                                        jbyteArray dir,
    844                                        jintArray std_fds,
    845                                        jboolean redirectErrorStream)
    846 {
    847     int errnum;
    848     int resultPid = -1;
    849     int in[2], out[2], err[2], fail[2];
    850     jint *fds = NULL;
    851     const char *pprog = NULL;
    852     const char *pargBlock = NULL;
    853     const char *penvBlock = NULL;
    854     ChildStuff *c;
    855 
    856     in[0] = in[1] = out[0] = out[1] = err[0] = err[1] = fail[0] = fail[1] = -1;
    857 
    858     if ((c = NEW(ChildStuff, 1)) == NULL) return -1;
    859     c->argv = NULL;
    860     c->envv = NULL;
    861     c->pdir = NULL;
    862 #if START_CHILD_USE_CLONE
    863     c->clone_stack = NULL;
    864 #endif
    865 
    866     /* Convert prog + argBlock into a char ** argv.
    867      * Add one word room for expansion of argv for use by
    868      * execve_as_traditional_shell_script.
    869      */
    870     assert(prog != NULL && argBlock != NULL);
    871     if ((pprog     = getBytes(env, prog))       == NULL) goto Catch;
    872     if ((pargBlock = getBytes(env, argBlock))   == NULL) goto Catch;
    873     if ((c->argv = NEW(const char *, argc + 3)) == NULL) goto Catch;
    874     c->argv[0] = pprog;
    875     initVectorFromBlock(c->argv+1, pargBlock, argc);
    876 
    877     if (envBlock != NULL) {
    878         /* Convert envBlock into a char ** envv */
    879         if ((penvBlock = getBytes(env, envBlock))   == NULL) goto Catch;
    880         if ((c->envv = NEW(const char *, envc + 1)) == NULL) goto Catch;
    881         initVectorFromBlock(c->envv, penvBlock, envc);
    882     }
    883 
    884     if (dir != NULL) {
    885         if ((c->pdir = getBytes(env, dir)) == NULL) goto Catch;
    886     }
    887 
    888     assert(std_fds != NULL);
    889     fds = (*env)->GetIntArrayElements(env, std_fds, NULL);
    890     if (fds == NULL) goto Catch;
    891 
    892     if ((fds[0] == -1 && pipe(in)  < 0) ||
    893         (fds[1] == -1 && pipe(out) < 0) ||
    894         (fds[2] == -1 && pipe(err) < 0) ||
    895         (pipe(fail) < 0)) {
    896         throwIOException(env, errno, "Bad file descriptor");
    897         goto Catch;
    898     }
    899     c->fds[0] = fds[0];
    900     c->fds[1] = fds[1];
    901     c->fds[2] = fds[2];
    902 
    903     copyPipe(in,   c->in);
    904     copyPipe(out,  c->out);
    905     copyPipe(err,  c->err);
    906     copyPipe(fail, c->fail);
    907 
    908     c->redirectErrorStream = redirectErrorStream;
    909 
    910     resultPid = startChild(c);
    911     assert(resultPid != 0);
    912 
    913     if (resultPid < 0) {
    914         throwIOException(env, errno, START_CHILD_SYSTEM_CALL " failed");
    915         goto Catch;
    916     }
    917 
    918     restartableClose(fail[1]); fail[1] = -1; /* See: WhyCantJohnnyExec */
    919 
    920     switch (readFully(fail[0], &errnum, sizeof(errnum))) {
    921     case 0: break; /* Exec succeeded */
    922     case sizeof(errnum):
    923         waitpid(resultPid, NULL, 0);
    924         throwIOException(env, errnum, "Exec failed");
    925         goto Catch;
    926     default:
    927         throwIOException(env, errno, "Read failed");
    928         goto Catch;
    929     }
    930 
    931     fds[0] = (in [1] != -1) ? in [1] : -1;
    932     fds[1] = (out[0] != -1) ? out[0] : -1;
    933     fds[2] = (err[0] != -1) ? err[0] : -1;
    934 
    935  Finally:
    936 #if START_CHILD_USE_CLONE
    937     free(c->clone_stack);
    938 #endif
    939 
    940     /* Always clean up the child's side of the pipes */
    941     closeSafely(in [0]);
    942     closeSafely(out[1]);
    943     closeSafely(err[1]);
    944 
    945     /* Always clean up fail descriptors */
    946     closeSafely(fail[0]);
    947     closeSafely(fail[1]);
    948 
    949     releaseBytes(env, prog,     pprog);
    950     releaseBytes(env, argBlock, pargBlock);
    951     releaseBytes(env, envBlock, penvBlock);
    952     releaseBytes(env, dir,      c->pdir);
    953 
    954     free(c->argv);
    955     free(c->envv);
    956     free(c);
    957 
    958     if (fds != NULL)
    959         (*env)->ReleaseIntArrayElements(env, std_fds, fds, 0);
    960 
    961     return resultPid;
    962 
    963  Catch:
    964     /* Clean up the parent's side of the pipes in case of failure only */
    965     closeSafely(in [1]);
    966     closeSafely(out[0]);
    967     closeSafely(err[0]);
    968     goto Finally;
    969 }
    970 
    971 JNIEXPORT void JNICALL
    972 UNIXProcess_destroyProcess(JNIEnv *env, jobject junk, jint pid)
    973 {
    974     kill(pid, SIGTERM);
    975 }
    976 
    977 static JNINativeMethod gMethods[] = {
    978   NATIVE_METHOD(UNIXProcess, destroyProcess, "(I)V"),
    979   NATIVE_METHOD(UNIXProcess, forkAndExec, "([B[BI[BI[B[IZ)I"),
    980   NATIVE_METHOD(UNIXProcess, waitForProcessExit, "(I)I"),
    981   NATIVE_METHOD(UNIXProcess, initIDs, "()V"),
    982 };
    983 
    984 void register_java_lang_UNIXProcess(JNIEnv* env) {
    985   jniRegisterNativeMethods(env, "java/lang/UNIXProcess", gMethods, NELEM(gMethods));
    986 }
    987