Home | History | Annotate | Download | only in syscall
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // +build darwin dragonfly freebsd linux netbsd openbsd solaris
      6 
      7 // Fork, exec, wait, etc.
      8 
      9 package syscall
     10 
     11 import (
     12 	"runtime"
     13 	"sync"
     14 	"unsafe"
     15 )
     16 
     17 // Lock synchronizing creation of new file descriptors with fork.
     18 //
     19 // We want the child in a fork/exec sequence to inherit only the
     20 // file descriptors we intend. To do that, we mark all file
     21 // descriptors close-on-exec and then, in the child, explicitly
     22 // unmark the ones we want the exec'ed program to keep.
     23 // Unix doesn't make this easy: there is, in general, no way to
     24 // allocate a new file descriptor close-on-exec. Instead you
     25 // have to allocate the descriptor and then mark it close-on-exec.
     26 // If a fork happens between those two events, the child's exec
     27 // will inherit an unwanted file descriptor.
     28 //
     29 // This lock solves that race: the create new fd/mark close-on-exec
     30 // operation is done holding ForkLock for reading, and the fork itself
     31 // is done holding ForkLock for writing. At least, that's the idea.
     32 // There are some complications.
     33 //
     34 // Some system calls that create new file descriptors can block
     35 // for arbitrarily long times: open on a hung NFS server or named
     36 // pipe, accept on a socket, and so on. We can't reasonably grab
     37 // the lock across those operations.
     38 //
     39 // It is worse to inherit some file descriptors than others.
     40 // If a non-malicious child accidentally inherits an open ordinary file,
     41 // that's not a big deal. On the other hand, if a long-lived child
     42 // accidentally inherits the write end of a pipe, then the reader
     43 // of that pipe will not see EOF until that child exits, potentially
     44 // causing the parent program to hang. This is a common problem
     45 // in threaded C programs that use popen.
     46 //
     47 // Luckily, the file descriptors that are most important not to
     48 // inherit are not the ones that can take an arbitrarily long time
     49 // to create: pipe returns instantly, and the net package uses
     50 // non-blocking I/O to accept on a listening socket.
     51 // The rules for which file descriptor-creating operations use the
     52 // ForkLock are as follows:
     53 //
     54 // 1) Pipe. Does not block. Use the ForkLock.
     55 // 2) Socket. Does not block. Use the ForkLock.
     56 // 3) Accept. If using non-blocking mode, use the ForkLock.
     57 //             Otherwise, live with the race.
     58 // 4) Open. Can block. Use O_CLOEXEC if available (Linux).
     59 //             Otherwise, live with the race.
     60 // 5) Dup. Does not block. Use the ForkLock.
     61 //             On Linux, could use fcntl F_DUPFD_CLOEXEC
     62 //             instead of the ForkLock, but only for dup(fd, -1).
     63 
     64 var ForkLock sync.RWMutex
     65 
     66 // StringSlicePtr converts a slice of strings to a slice of pointers
     67 // to NUL-terminated byte arrays. If any string contains a NUL byte
     68 // this function panics instead of returning an error.
     69 //
     70 // Deprecated: Use SlicePtrFromStrings instead.
     71 func StringSlicePtr(ss []string) []*byte {
     72 	bb := make([]*byte, len(ss)+1)
     73 	for i := 0; i < len(ss); i++ {
     74 		bb[i] = StringBytePtr(ss[i])
     75 	}
     76 	bb[len(ss)] = nil
     77 	return bb
     78 }
     79 
     80 // SlicePtrFromStrings converts a slice of strings to a slice of
     81 // pointers to NUL-terminated byte arrays. If any string contains
     82 // a NUL byte, it returns (nil, EINVAL).
     83 func SlicePtrFromStrings(ss []string) ([]*byte, error) {
     84 	var err error
     85 	bb := make([]*byte, len(ss)+1)
     86 	for i := 0; i < len(ss); i++ {
     87 		bb[i], err = BytePtrFromString(ss[i])
     88 		if err != nil {
     89 			return nil, err
     90 		}
     91 	}
     92 	bb[len(ss)] = nil
     93 	return bb, nil
     94 }
     95 
     96 func CloseOnExec(fd int) { fcntl(fd, F_SETFD, FD_CLOEXEC) }
     97 
     98 func SetNonblock(fd int, nonblocking bool) (err error) {
     99 	flag, err := fcntl(fd, F_GETFL, 0)
    100 	if err != nil {
    101 		return err
    102 	}
    103 	if nonblocking {
    104 		flag |= O_NONBLOCK
    105 	} else {
    106 		flag &^= O_NONBLOCK
    107 	}
    108 	_, err = fcntl(fd, F_SETFL, flag)
    109 	return err
    110 }
    111 
    112 // Credential holds user and group identities to be assumed
    113 // by a child process started by StartProcess.
    114 type Credential struct {
    115 	Uid         uint32   // User ID.
    116 	Gid         uint32   // Group ID.
    117 	Groups      []uint32 // Supplementary group IDs.
    118 	NoSetGroups bool     // If true, don't set supplementary groups
    119 }
    120 
    121 // ProcAttr holds attributes that will be applied to a new process started
    122 // by StartProcess.
    123 type ProcAttr struct {
    124 	Dir   string    // Current working directory.
    125 	Env   []string  // Environment.
    126 	Files []uintptr // File descriptors.
    127 	Sys   *SysProcAttr
    128 }
    129 
    130 var zeroProcAttr ProcAttr
    131 var zeroSysProcAttr SysProcAttr
    132 
    133 func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
    134 	var p [2]int
    135 	var n int
    136 	var err1 Errno
    137 	var wstatus WaitStatus
    138 
    139 	if attr == nil {
    140 		attr = &zeroProcAttr
    141 	}
    142 	sys := attr.Sys
    143 	if sys == nil {
    144 		sys = &zeroSysProcAttr
    145 	}
    146 
    147 	p[0] = -1
    148 	p[1] = -1
    149 
    150 	// Convert args to C form.
    151 	argv0p, err := BytePtrFromString(argv0)
    152 	if err != nil {
    153 		return 0, err
    154 	}
    155 	argvp, err := SlicePtrFromStrings(argv)
    156 	if err != nil {
    157 		return 0, err
    158 	}
    159 	envvp, err := SlicePtrFromStrings(attr.Env)
    160 	if err != nil {
    161 		return 0, err
    162 	}
    163 
    164 	if (runtime.GOOS == "freebsd" || runtime.GOOS == "dragonfly") && len(argv[0]) > len(argv0) {
    165 		argvp[0] = argv0p
    166 	}
    167 
    168 	var chroot *byte
    169 	if sys.Chroot != "" {
    170 		chroot, err = BytePtrFromString(sys.Chroot)
    171 		if err != nil {
    172 			return 0, err
    173 		}
    174 	}
    175 	var dir *byte
    176 	if attr.Dir != "" {
    177 		dir, err = BytePtrFromString(attr.Dir)
    178 		if err != nil {
    179 			return 0, err
    180 		}
    181 	}
    182 
    183 	// Acquire the fork lock so that no other threads
    184 	// create new fds that are not yet close-on-exec
    185 	// before we fork.
    186 	ForkLock.Lock()
    187 
    188 	// Allocate child status pipe close on exec.
    189 	if err = forkExecPipe(p[:]); err != nil {
    190 		goto error
    191 	}
    192 
    193 	// Kick off child.
    194 	pid, err1 = forkAndExecInChild(argv0p, argvp, envvp, chroot, dir, attr, sys, p[1])
    195 	if err1 != 0 {
    196 		err = Errno(err1)
    197 		goto error
    198 	}
    199 	ForkLock.Unlock()
    200 
    201 	// Read child error status from pipe.
    202 	Close(p[1])
    203 	n, err = readlen(p[0], (*byte)(unsafe.Pointer(&err1)), int(unsafe.Sizeof(err1)))
    204 	Close(p[0])
    205 	if err != nil || n != 0 {
    206 		if n == int(unsafe.Sizeof(err1)) {
    207 			err = Errno(err1)
    208 		}
    209 		if err == nil {
    210 			err = EPIPE
    211 		}
    212 
    213 		// Child failed; wait for it to exit, to make sure
    214 		// the zombies don't accumulate.
    215 		_, err1 := Wait4(pid, &wstatus, 0, nil)
    216 		for err1 == EINTR {
    217 			_, err1 = Wait4(pid, &wstatus, 0, nil)
    218 		}
    219 		return 0, err
    220 	}
    221 
    222 	// Read got EOF, so pipe closed on exec, so exec succeeded.
    223 	return pid, nil
    224 
    225 error:
    226 	if p[0] >= 0 {
    227 		Close(p[0])
    228 		Close(p[1])
    229 	}
    230 	ForkLock.Unlock()
    231 	return 0, err
    232 }
    233 
    234 // Combination of fork and exec, careful to be thread safe.
    235 func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
    236 	return forkExec(argv0, argv, attr)
    237 }
    238 
    239 // StartProcess wraps ForkExec for package os.
    240 func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) {
    241 	pid, err = forkExec(argv0, argv, attr)
    242 	return pid, 0, err
    243 }
    244 
    245 // Implemented in runtime package.
    246 func runtime_BeforeExec()
    247 func runtime_AfterExec()
    248 
    249 // execveSolaris is non-nil on Solaris, set to execve in exec_solaris.go; this
    250 // avoids a build dependency for other platforms.
    251 var execveSolaris func(path uintptr, argv uintptr, envp uintptr) (err Errno)
    252 
    253 // Exec invokes the execve(2) system call.
    254 func Exec(argv0 string, argv []string, envv []string) (err error) {
    255 	argv0p, err := BytePtrFromString(argv0)
    256 	if err != nil {
    257 		return err
    258 	}
    259 	argvp, err := SlicePtrFromStrings(argv)
    260 	if err != nil {
    261 		return err
    262 	}
    263 	envvp, err := SlicePtrFromStrings(envv)
    264 	if err != nil {
    265 		return err
    266 	}
    267 	runtime_BeforeExec()
    268 
    269 	var err1 Errno
    270 	if runtime.GOOS == "solaris" {
    271 		// RawSyscall should never be used on Solaris.
    272 		err1 = execveSolaris(
    273 			uintptr(unsafe.Pointer(argv0p)),
    274 			uintptr(unsafe.Pointer(&argvp[0])),
    275 			uintptr(unsafe.Pointer(&envvp[0])))
    276 	} else {
    277 		_, _, err1 = RawSyscall(SYS_EXECVE,
    278 			uintptr(unsafe.Pointer(argv0p)),
    279 			uintptr(unsafe.Pointer(&argvp[0])),
    280 			uintptr(unsafe.Pointer(&envvp[0])))
    281 	}
    282 	runtime_AfterExec()
    283 	return err1
    284 }
    285