Home | History | Annotate | Download | only in suid
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // https://chromium.googlesource.com/chromium/src/+/master/docs/linux_suid_sandbox.md
      6 
      7 #include "sandbox/linux/suid/common/sandbox.h"
      8 
      9 #define _GNU_SOURCE
     10 #include <asm/unistd.h>
     11 #include <errno.h>
     12 #include <fcntl.h>
     13 #include <limits.h>
     14 #include <sched.h>
     15 #include <signal.h>
     16 #include <stdarg.h>
     17 #include <stdbool.h>
     18 #include <stddef.h>
     19 #include <stdint.h>
     20 #include <stdio.h>
     21 #include <stdlib.h>
     22 #include <string.h>
     23 #include <sys/prctl.h>
     24 #include <sys/resource.h>
     25 #include <sys/socket.h>
     26 #include <sys/stat.h>
     27 #include <sys/time.h>
     28 #include <sys/types.h>
     29 #include <sys/vfs.h>
     30 #include <sys/wait.h>
     31 #include <unistd.h>
     32 
     33 #include "sandbox/linux/suid/common/suid_unsafe_environment_variables.h"
     34 #include "sandbox/linux/suid/process_util.h"
     35 
     36 #if !defined(CLONE_NEWPID)
     37 #define CLONE_NEWPID 0x20000000
     38 #endif
     39 #if !defined(CLONE_NEWNET)
     40 #define CLONE_NEWNET 0x40000000
     41 #endif
     42 
     43 static bool DropRoot();
     44 
     45 #define HANDLE_EINTR(x) TEMP_FAILURE_RETRY(x)
     46 
     47 static void FatalError(const char* msg, ...)
     48     __attribute__((noreturn, format(printf, 1, 2)));
     49 
     50 static void FatalError(const char* msg, ...) {
     51   va_list ap;
     52   va_start(ap, msg);
     53 
     54   vfprintf(stderr, msg, ap);
     55   fprintf(stderr, ": %s\n", strerror(errno));
     56   fflush(stderr);
     57   va_end(ap);
     58   _exit(1);
     59 }
     60 
     61 static void ExitWithErrorSignalHandler(int signal) {
     62   const char msg[] = "\nThe setuid sandbox got signaled, exiting.\n";
     63   if (-1 == write(2, msg, sizeof(msg) - 1)) {
     64     // Do nothing.
     65   }
     66 
     67   _exit(1);
     68 }
     69 
     70 // We will chroot() to the helper's /proc/self directory. Anything there will
     71 // not exist anymore if we make sure to wait() for the helper.
     72 //
     73 // /proc/self/fdinfo or /proc/self/fd are especially safe and will be empty
     74 // even if the helper survives as a zombie.
     75 //
     76 // There is very little reason to use fdinfo/ instead of fd/ but we are
     77 // paranoid. fdinfo/ only exists since 2.6.22 so we allow fallback to fd/
     78 #define SAFE_DIR "/proc/self/fdinfo"
     79 #define SAFE_DIR2 "/proc/self/fd"
     80 
     81 static bool SpawnChrootHelper() {
     82   int sv[2];
     83   if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
     84     perror("socketpair");
     85     return false;
     86   }
     87 
     88   char* safedir = NULL;
     89   struct stat sdir_stat;
     90   if (!stat(SAFE_DIR, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) {
     91     safedir = SAFE_DIR;
     92   } else if (!stat(SAFE_DIR2, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) {
     93     safedir = SAFE_DIR2;
     94   } else {
     95     fprintf(stderr, "Could not find %s\n", SAFE_DIR2);
     96     return false;
     97   }
     98 
     99   const pid_t pid = syscall(__NR_clone, CLONE_FS | SIGCHLD, 0, 0, 0);
    100 
    101   if (pid == -1) {
    102     perror("clone");
    103     close(sv[0]);
    104     close(sv[1]);
    105     return false;
    106   }
    107 
    108   if (pid == 0) {
    109     // We share our files structure with an untrusted process. As a security in
    110     // depth measure, we make sure that we can't open anything by mistake.
    111     // TODO(agl): drop CAP_SYS_RESOURCE / use SECURE_NOROOT
    112 
    113     const struct rlimit nofile = {0, 0};
    114     if (setrlimit(RLIMIT_NOFILE, &nofile))
    115       FatalError("Setting RLIMIT_NOFILE");
    116 
    117     if (close(sv[1]))
    118       FatalError("close");
    119 
    120     // wait for message
    121     char msg;
    122     ssize_t bytes;
    123     do {
    124       bytes = read(sv[0], &msg, 1);
    125     } while (bytes == -1 && errno == EINTR);
    126 
    127     if (bytes == 0)
    128       _exit(0);
    129     if (bytes != 1)
    130       FatalError("read");
    131 
    132     // do chrooting
    133     if (msg != kMsgChrootMe)
    134       FatalError("Unknown message from sandboxed process");
    135 
    136     // sanity check
    137     if (chdir(safedir))
    138       FatalError("Cannot chdir into /proc/ directory");
    139 
    140     if (chroot(safedir))
    141       FatalError("Cannot chroot into /proc/ directory");
    142 
    143     if (chdir("/"))
    144       FatalError("Cannot chdir to / after chroot");
    145 
    146     const char reply = kMsgChrootSuccessful;
    147     do {
    148       bytes = write(sv[0], &reply, 1);
    149     } while (bytes == -1 && errno == EINTR);
    150 
    151     if (bytes != 1)
    152       FatalError("Writing reply");
    153 
    154     _exit(0);
    155     // We now become a zombie. /proc/self/fd(info) is now an empty dir and we
    156     // are chrooted there.
    157     // Our (unprivileged) parent should not even be able to open "." or "/"
    158     // since they would need to pass the ptrace() check. If our parent wait()
    159     // for us, our root directory will completely disappear.
    160   }
    161 
    162   if (close(sv[0])) {
    163     close(sv[1]);
    164     perror("close");
    165     return false;
    166   }
    167 
    168   // In the parent process, we install an environment variable containing the
    169   // number of the file descriptor.
    170   char desc_str[64];
    171   int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]);
    172   if (printed < 0 || printed >= (int)sizeof(desc_str)) {
    173     fprintf(stderr, "Failed to snprintf\n");
    174     return false;
    175   }
    176 
    177   if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) {
    178     perror("setenv");
    179     close(sv[1]);
    180     return false;
    181   }
    182 
    183   // We also install an environment variable containing the pid of the child
    184   char helper_pid_str[64];
    185   printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid);
    186   if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) {
    187     fprintf(stderr, "Failed to snprintf\n");
    188     return false;
    189   }
    190 
    191   if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) {
    192     perror("setenv");
    193     close(sv[1]);
    194     return false;
    195   }
    196 
    197   return true;
    198 }
    199 
    200 // Block until child_pid exits, then exit. Try to preserve the exit code.
    201 static void WaitForChildAndExit(pid_t child_pid) {
    202   int exit_code = -1;
    203   siginfo_t reaped_child_info;
    204 
    205   // Don't "Core" on SIGABRT. SIGABRT is sent by the Chrome OS session manager
    206   // when things are hanging.
    207   // Here, the current process is going to waitid() and _exit(), so there is no
    208   // point in generating a crash report. The child process is the one
    209   // blocking us.
    210   if (signal(SIGABRT, ExitWithErrorSignalHandler) == SIG_ERR) {
    211     FatalError("Failed to change signal handler");
    212   }
    213 
    214   int wait_ret =
    215       HANDLE_EINTR(waitid(P_PID, child_pid, &reaped_child_info, WEXITED));
    216 
    217   if (!wait_ret && reaped_child_info.si_pid == child_pid) {
    218     if (reaped_child_info.si_code == CLD_EXITED) {
    219       exit_code = reaped_child_info.si_status;
    220     } else {
    221       // Exit with code 0 if the child got signaled.
    222       exit_code = 0;
    223     }
    224   }
    225   _exit(exit_code);
    226 }
    227 
    228 static bool MoveToNewNamespaces() {
    229   // These are the sets of flags which we'll try, in order.
    230   const int kCloneExtraFlags[] = {CLONE_NEWPID | CLONE_NEWNET, CLONE_NEWPID, };
    231 
    232   // We need to close kZygoteIdFd before the child can continue. We use this
    233   // socketpair to tell the child when to continue;
    234   int sync_fds[2];
    235   if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) {
    236     FatalError("Failed to create a socketpair");
    237   }
    238 
    239   for (size_t i = 0; i < sizeof(kCloneExtraFlags) / sizeof(kCloneExtraFlags[0]);
    240        i++) {
    241     pid_t pid = syscall(__NR_clone, SIGCHLD | kCloneExtraFlags[i], 0, 0, 0);
    242     const int clone_errno = errno;
    243 
    244     if (pid > 0) {
    245       if (!DropRoot()) {
    246         FatalError("Could not drop privileges");
    247       } else {
    248         if (close(sync_fds[0]) || shutdown(sync_fds[1], SHUT_RD))
    249           FatalError("Could not close socketpair");
    250         // The kZygoteIdFd needs to be closed in the parent before
    251         // Zygote gets started.
    252         if (close(kZygoteIdFd))
    253           FatalError("close");
    254         // Tell our child to continue
    255         if (HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) != 1)
    256           FatalError("send");
    257         if (close(sync_fds[1]))
    258           FatalError("close");
    259         // We want to keep a full process tree and we don't want our childs to
    260         // be reparented to (the outer PID namespace) init. So we wait for it.
    261         WaitForChildAndExit(pid);
    262       }
    263       // NOTREACHED
    264       FatalError("Not reached");
    265     }
    266 
    267     if (pid == 0) {
    268       if (close(sync_fds[1]) || shutdown(sync_fds[0], SHUT_WR))
    269         FatalError("Could not close socketpair");
    270 
    271       // Wait for the parent to confirm it closed kZygoteIdFd before we
    272       // continue
    273       char should_continue;
    274       if (HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)) != 1)
    275         FatalError("Read on socketpair");
    276       if (close(sync_fds[0]))
    277         FatalError("close");
    278 
    279       if (kCloneExtraFlags[i] & CLONE_NEWPID) {
    280         setenv(kSandboxPIDNSEnvironmentVarName, "", 1 /* overwrite */);
    281       } else {
    282         unsetenv(kSandboxPIDNSEnvironmentVarName);
    283       }
    284 
    285       if (kCloneExtraFlags[i] & CLONE_NEWNET) {
    286         setenv(kSandboxNETNSEnvironmentVarName, "", 1 /* overwrite */);
    287       } else {
    288         unsetenv(kSandboxNETNSEnvironmentVarName);
    289       }
    290 
    291       break;
    292     }
    293 
    294     // If EINVAL then the system doesn't support the requested flags, so
    295     // continue to try a different set.
    296     // On any other errno value the system *does* support these flags but
    297     // something went wrong, hence we bail with an error message rather then
    298     // provide less security.
    299     if (errno != EINVAL) {
    300       fprintf(stderr, "Failed to move to new namespace:");
    301       if (kCloneExtraFlags[i] & CLONE_NEWPID) {
    302         fprintf(stderr, " PID namespaces supported,");
    303       }
    304       if (kCloneExtraFlags[i] & CLONE_NEWNET) {
    305         fprintf(stderr, " Network namespace supported,");
    306       }
    307       fprintf(stderr, " but failed: errno = %s\n", strerror(clone_errno));
    308       return false;
    309     }
    310   }
    311 
    312   // If the system doesn't support NEWPID then we carry on anyway.
    313   return true;
    314 }
    315 
    316 static bool DropRoot() {
    317   if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) {
    318     perror("prctl(PR_SET_DUMPABLE)");
    319     return false;
    320   }
    321 
    322   if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) {
    323     perror("Still dumpable after prctl(PR_SET_DUMPABLE)");
    324     return false;
    325   }
    326 
    327   gid_t rgid, egid, sgid;
    328   if (getresgid(&rgid, &egid, &sgid)) {
    329     perror("getresgid");
    330     return false;
    331   }
    332 
    333   if (setresgid(rgid, rgid, rgid)) {
    334     perror("setresgid");
    335     return false;
    336   }
    337 
    338   uid_t ruid, euid, suid;
    339   if (getresuid(&ruid, &euid, &suid)) {
    340     perror("getresuid");
    341     return false;
    342   }
    343 
    344   if (setresuid(ruid, ruid, ruid)) {
    345     perror("setresuid");
    346     return false;
    347   }
    348 
    349   return true;
    350 }
    351 
    352 static bool SetupChildEnvironment() {
    353   unsigned i;
    354 
    355   // ld.so may have cleared several environment variables because we are SUID.
    356   // However, the child process might need them so zygote_host_linux.cc saves a
    357   // copy in SANDBOX_$x. This is safe because we have dropped root by this
    358   // point, so we can only exec a binary with the permissions of the user who
    359   // ran us in the first place.
    360 
    361   for (i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) {
    362     const char* const envvar = kSUIDUnsafeEnvironmentVariables[i];
    363     char* const saved_envvar = SandboxSavedEnvironmentVariable(envvar);
    364     if (!saved_envvar)
    365       return false;
    366 
    367     const char* const value = getenv(saved_envvar);
    368     if (value) {
    369       setenv(envvar, value, 1 /* overwrite */);
    370       unsetenv(saved_envvar);
    371     }
    372 
    373     free(saved_envvar);
    374   }
    375 
    376   return true;
    377 }
    378 
    379 bool CheckAndExportApiVersion() {
    380   // Check the environment to see if a specific API version was requested.
    381   // assume version 0 if none.
    382   int api_number = -1;
    383   char* api_string = getenv(kSandboxEnvironmentApiRequest);
    384   if (!api_string) {
    385     api_number = 0;
    386   } else {
    387     errno = 0;
    388     char* endptr = NULL;
    389     long long_api_number = strtol(api_string, &endptr, 10);
    390     if (!endptr || *endptr || errno != 0 || long_api_number < INT_MIN ||
    391         long_api_number > INT_MAX) {
    392       return false;
    393     }
    394     api_number = long_api_number;
    395   }
    396 
    397   // Warn only for now.
    398   if (api_number != kSUIDSandboxApiNumber) {
    399     fprintf(
    400         stderr,
    401         "The setuid sandbox provides API version %d, "
    402         "but you need %d\n"
    403         "Please read "
    404         "https://chromium.googlesource.com/chromium/src/+/master/docs/linux_suid_sandbox_development.md."
    405         "\n\n",
    406         kSUIDSandboxApiNumber,
    407         api_number);
    408   }
    409 
    410   // Export our version so that the sandboxed process can verify it did not
    411   // use an old sandbox.
    412   char version_string[64];
    413   snprintf(version_string, sizeof(version_string), "%d", kSUIDSandboxApiNumber);
    414   if (setenv(kSandboxEnvironmentApiProvides, version_string, 1)) {
    415     perror("setenv");
    416     return false;
    417   }
    418 
    419   return true;
    420 }
    421 
    422 int main(int argc, char** argv) {
    423   if (argc <= 1) {
    424     if (argc <= 0) {
    425       return 1;
    426     }
    427 
    428     fprintf(stderr, "Usage: %s <renderer process> <args...>\n", argv[0]);
    429     return 1;
    430   }
    431 
    432   // Allow someone to query our API version
    433   if (argc == 2 && 0 == strcmp(argv[1], kSuidSandboxGetApiSwitch)) {
    434     printf("%d\n", kSUIDSandboxApiNumber);
    435     return 0;
    436   }
    437 
    438   // We cannot adjust /proc/pid/oom_adj for sandboxed renderers
    439   // because those files are owned by root. So we need a helper here.
    440   if (argc == 4 && (0 == strcmp(argv[1], kAdjustOOMScoreSwitch))) {
    441     char* endptr = NULL;
    442     long score;
    443     errno = 0;
    444     unsigned long pid_ul = strtoul(argv[2], &endptr, 10);
    445     if (pid_ul == ULONG_MAX || !endptr || *endptr || errno != 0)
    446       return 1;
    447     pid_t pid = pid_ul;
    448     endptr = NULL;
    449     errno = 0;
    450     score = strtol(argv[3], &endptr, 10);
    451     if (score == LONG_MAX || score == LONG_MIN || !endptr || *endptr ||
    452         errno != 0) {
    453       return 1;
    454     }
    455     return AdjustOOMScore(pid, score);
    456   }
    457 
    458   // Protect the core setuid sandbox functionality with an API version
    459   if (!CheckAndExportApiVersion()) {
    460     return 1;
    461   }
    462 
    463   if (geteuid() != 0) {
    464     fprintf(stderr,
    465             "The setuid sandbox is not running as root. Common causes:\n"
    466             "  * An unprivileged process using ptrace on it, like a debugger.\n"
    467             "  * A parent process set prctl(PR_SET_NO_NEW_PRIVS, ...)\n");
    468   }
    469 
    470   if (!MoveToNewNamespaces())
    471     return 1;
    472   if (!SpawnChrootHelper())
    473     return 1;
    474   if (!DropRoot())
    475     return 1;
    476   if (!SetupChildEnvironment())
    477     return 1;
    478 
    479   execv(argv[1], &argv[1]);
    480   FatalError("execv failed");
    481 
    482   return 1;
    483 }
    484