Home | History | Annotate | Download | only in suid
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // http://code.google.com/p/chromium/wiki/LinuxSUIDSandbox
      6 
      7 #include "sandbox/linux/suid/common/sandbox.h"
      8 
      9 #define _GNU_SOURCE
     10 #include <asm/unistd.h>
     11 #include <errno.h>
     12 #include <fcntl.h>
     13 #include <limits.h>
     14 #include <sched.h>
     15 #include <signal.h>
     16 #include <stdarg.h>
     17 #include <stdbool.h>
     18 #include <stdint.h>
     19 #include <stdio.h>
     20 #include <stdlib.h>
     21 #include <string.h>
     22 #include <sys/prctl.h>
     23 #include <sys/resource.h>
     24 #include <sys/socket.h>
     25 #include <sys/stat.h>
     26 #include <sys/time.h>
     27 #include <sys/types.h>
     28 #include <sys/vfs.h>
     29 #include <sys/wait.h>
     30 #include <unistd.h>
     31 
     32 #include "sandbox/linux/suid/common/suid_unsafe_environment_variables.h"
     33 #include "sandbox/linux/suid/process_util.h"
     34 
     35 #if !defined(CLONE_NEWPID)
     36 #define CLONE_NEWPID 0x20000000
     37 #endif
     38 #if !defined(CLONE_NEWNET)
     39 #define CLONE_NEWNET 0x40000000
     40 #endif
     41 
     42 static bool DropRoot();
     43 
     44 #define HANDLE_EINTR(x) TEMP_FAILURE_RETRY(x)
     45 
     46 static void FatalError(const char* msg, ...)
     47     __attribute__((noreturn, format(printf, 1, 2)));
     48 
     49 static void FatalError(const char* msg, ...) {
     50   va_list ap;
     51   va_start(ap, msg);
     52 
     53   vfprintf(stderr, msg, ap);
     54   fprintf(stderr, ": %s\n", strerror(errno));
     55   fflush(stderr);
     56   va_end(ap);
     57   _exit(1);
     58 }
     59 
     60 static void ExitWithErrorSignalHandler(int signal) {
     61   const char msg[] = "\nThe setuid sandbox got signaled, exiting.\n";
     62   if (-1 == write(2, msg, sizeof(msg) - 1)) {
     63     // Do nothing.
     64   }
     65 
     66   _exit(1);
     67 }
     68 
     69 // We will chroot() to the helper's /proc/self directory. Anything there will
     70 // not exist anymore if we make sure to wait() for the helper.
     71 //
     72 // /proc/self/fdinfo or /proc/self/fd are especially safe and will be empty
     73 // even if the helper survives as a zombie.
     74 //
     75 // There is very little reason to use fdinfo/ instead of fd/ but we are
     76 // paranoid. fdinfo/ only exists since 2.6.22 so we allow fallback to fd/
     77 #define SAFE_DIR "/proc/self/fdinfo"
     78 #define SAFE_DIR2 "/proc/self/fd"
     79 
     80 static bool SpawnChrootHelper() {
     81   int sv[2];
     82   if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
     83     perror("socketpair");
     84     return false;
     85   }
     86 
     87   char* safedir = NULL;
     88   struct stat sdir_stat;
     89   if (!stat(SAFE_DIR, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) {
     90     safedir = SAFE_DIR;
     91   } else if (!stat(SAFE_DIR2, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) {
     92     safedir = SAFE_DIR2;
     93   } else {
     94     fprintf(stderr, "Could not find %s\n", SAFE_DIR2);
     95     return false;
     96   }
     97 
     98   const pid_t pid = syscall(__NR_clone, CLONE_FS | SIGCHLD, 0, 0, 0);
     99 
    100   if (pid == -1) {
    101     perror("clone");
    102     close(sv[0]);
    103     close(sv[1]);
    104     return false;
    105   }
    106 
    107   if (pid == 0) {
    108     // We share our files structure with an untrusted process. As a security in
    109     // depth measure, we make sure that we can't open anything by mistake.
    110     // TODO(agl): drop CAP_SYS_RESOURCE / use SECURE_NOROOT
    111 
    112     const struct rlimit nofile = {0, 0};
    113     if (setrlimit(RLIMIT_NOFILE, &nofile))
    114       FatalError("Setting RLIMIT_NOFILE");
    115 
    116     if (close(sv[1]))
    117       FatalError("close");
    118 
    119     // wait for message
    120     char msg;
    121     ssize_t bytes;
    122     do {
    123       bytes = read(sv[0], &msg, 1);
    124     } while (bytes == -1 && errno == EINTR);
    125 
    126     if (bytes == 0)
    127       _exit(0);
    128     if (bytes != 1)
    129       FatalError("read");
    130 
    131     // do chrooting
    132     if (msg != kMsgChrootMe)
    133       FatalError("Unknown message from sandboxed process");
    134 
    135     // sanity check
    136     if (chdir(safedir))
    137       FatalError("Cannot chdir into /proc/ directory");
    138 
    139     if (chroot(safedir))
    140       FatalError("Cannot chroot into /proc/ directory");
    141 
    142     if (chdir("/"))
    143       FatalError("Cannot chdir to / after chroot");
    144 
    145     const char reply = kMsgChrootSuccessful;
    146     do {
    147       bytes = write(sv[0], &reply, 1);
    148     } while (bytes == -1 && errno == EINTR);
    149 
    150     if (bytes != 1)
    151       FatalError("Writing reply");
    152 
    153     _exit(0);
    154     // We now become a zombie. /proc/self/fd(info) is now an empty dir and we
    155     // are chrooted there.
    156     // Our (unprivileged) parent should not even be able to open "." or "/"
    157     // since they would need to pass the ptrace() check. If our parent wait()
    158     // for us, our root directory will completely disappear.
    159   }
    160 
    161   if (close(sv[0])) {
    162     close(sv[1]);
    163     perror("close");
    164     return false;
    165   }
    166 
    167   // In the parent process, we install an environment variable containing the
    168   // number of the file descriptor.
    169   char desc_str[64];
    170   int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]);
    171   if (printed < 0 || printed >= (int)sizeof(desc_str)) {
    172     fprintf(stderr, "Failed to snprintf\n");
    173     return false;
    174   }
    175 
    176   if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) {
    177     perror("setenv");
    178     close(sv[1]);
    179     return false;
    180   }
    181 
    182   // We also install an environment variable containing the pid of the child
    183   char helper_pid_str[64];
    184   printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid);
    185   if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) {
    186     fprintf(stderr, "Failed to snprintf\n");
    187     return false;
    188   }
    189 
    190   if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) {
    191     perror("setenv");
    192     close(sv[1]);
    193     return false;
    194   }
    195 
    196   return true;
    197 }
    198 
    199 // Block until child_pid exits, then exit. Try to preserve the exit code.
    200 static void WaitForChildAndExit(pid_t child_pid) {
    201   int exit_code = -1;
    202   siginfo_t reaped_child_info;
    203 
    204   // Don't "Core" on SIGABRT. SIGABRT is sent by the Chrome OS session manager
    205   // when things are hanging.
    206   // Here, the current process is going to waitid() and _exit(), so there is no
    207   // point in generating a crash report. The child process is the one
    208   // blocking us.
    209   if (signal(SIGABRT, ExitWithErrorSignalHandler) == SIG_ERR) {
    210     FatalError("Failed to change signal handler");
    211   }
    212 
    213   int wait_ret =
    214       HANDLE_EINTR(waitid(P_PID, child_pid, &reaped_child_info, WEXITED));
    215 
    216   if (!wait_ret && reaped_child_info.si_pid == child_pid) {
    217     if (reaped_child_info.si_code == CLD_EXITED) {
    218       exit_code = reaped_child_info.si_status;
    219     } else {
    220       // Exit with code 0 if the child got signaled.
    221       exit_code = 0;
    222     }
    223   }
    224   _exit(exit_code);
    225 }
    226 
    227 static bool MoveToNewNamespaces() {
    228   // These are the sets of flags which we'll try, in order.
    229   const int kCloneExtraFlags[] = {CLONE_NEWPID | CLONE_NEWNET, CLONE_NEWPID, };
    230 
    231   // We need to close kZygoteIdFd before the child can continue. We use this
    232   // socketpair to tell the child when to continue;
    233   int sync_fds[2];
    234   if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) {
    235     FatalError("Failed to create a socketpair");
    236   }
    237 
    238   for (size_t i = 0; i < sizeof(kCloneExtraFlags) / sizeof(kCloneExtraFlags[0]);
    239        i++) {
    240     pid_t pid = syscall(__NR_clone, SIGCHLD | kCloneExtraFlags[i], 0, 0, 0);
    241     const int clone_errno = errno;
    242 
    243     if (pid > 0) {
    244       if (!DropRoot()) {
    245         FatalError("Could not drop privileges");
    246       } else {
    247         if (close(sync_fds[0]) || shutdown(sync_fds[1], SHUT_RD))
    248           FatalError("Could not close socketpair");
    249         // The kZygoteIdFd needs to be closed in the parent before
    250         // Zygote gets started.
    251         if (close(kZygoteIdFd))
    252           FatalError("close");
    253         // Tell our child to continue
    254         if (HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) != 1)
    255           FatalError("send");
    256         if (close(sync_fds[1]))
    257           FatalError("close");
    258         // We want to keep a full process tree and we don't want our childs to
    259         // be reparented to (the outer PID namespace) init. So we wait for it.
    260         WaitForChildAndExit(pid);
    261       }
    262       // NOTREACHED
    263       FatalError("Not reached");
    264     }
    265 
    266     if (pid == 0) {
    267       if (close(sync_fds[1]) || shutdown(sync_fds[0], SHUT_WR))
    268         FatalError("Could not close socketpair");
    269 
    270       // Wait for the parent to confirm it closed kZygoteIdFd before we
    271       // continue
    272       char should_continue;
    273       if (HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)) != 1)
    274         FatalError("Read on socketpair");
    275       if (close(sync_fds[0]))
    276         FatalError("close");
    277 
    278       if (kCloneExtraFlags[i] & CLONE_NEWPID) {
    279         setenv(kSandboxPIDNSEnvironmentVarName, "", 1 /* overwrite */);
    280       } else {
    281         unsetenv(kSandboxPIDNSEnvironmentVarName);
    282       }
    283 
    284       if (kCloneExtraFlags[i] & CLONE_NEWNET) {
    285         setenv(kSandboxNETNSEnvironmentVarName, "", 1 /* overwrite */);
    286       } else {
    287         unsetenv(kSandboxNETNSEnvironmentVarName);
    288       }
    289 
    290       break;
    291     }
    292 
    293     // If EINVAL then the system doesn't support the requested flags, so
    294     // continue to try a different set.
    295     // On any other errno value the system *does* support these flags but
    296     // something went wrong, hence we bail with an error message rather then
    297     // provide less security.
    298     if (errno != EINVAL) {
    299       fprintf(stderr, "Failed to move to new namespace:");
    300       if (kCloneExtraFlags[i] & CLONE_NEWPID) {
    301         fprintf(stderr, " PID namespaces supported,");
    302       }
    303       if (kCloneExtraFlags[i] & CLONE_NEWNET) {
    304         fprintf(stderr, " Network namespace supported,");
    305       }
    306       fprintf(stderr, " but failed: errno = %s\n", strerror(clone_errno));
    307       return false;
    308     }
    309   }
    310 
    311   // If the system doesn't support NEWPID then we carry on anyway.
    312   return true;
    313 }
    314 
    315 static bool DropRoot() {
    316   if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) {
    317     perror("prctl(PR_SET_DUMPABLE)");
    318     return false;
    319   }
    320 
    321   if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) {
    322     perror("Still dumpable after prctl(PR_SET_DUMPABLE)");
    323     return false;
    324   }
    325 
    326   gid_t rgid, egid, sgid;
    327   if (getresgid(&rgid, &egid, &sgid)) {
    328     perror("getresgid");
    329     return false;
    330   }
    331 
    332   if (setresgid(rgid, rgid, rgid)) {
    333     perror("setresgid");
    334     return false;
    335   }
    336 
    337   uid_t ruid, euid, suid;
    338   if (getresuid(&ruid, &euid, &suid)) {
    339     perror("getresuid");
    340     return false;
    341   }
    342 
    343   if (setresuid(ruid, ruid, ruid)) {
    344     perror("setresuid");
    345     return false;
    346   }
    347 
    348   return true;
    349 }
    350 
    351 static bool SetupChildEnvironment() {
    352   unsigned i;
    353 
    354   // ld.so may have cleared several environment variables because we are SUID.
    355   // However, the child process might need them so zygote_host_linux.cc saves a
    356   // copy in SANDBOX_$x. This is safe because we have dropped root by this
    357   // point, so we can only exec a binary with the permissions of the user who
    358   // ran us in the first place.
    359 
    360   for (i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) {
    361     const char* const envvar = kSUIDUnsafeEnvironmentVariables[i];
    362     char* const saved_envvar = SandboxSavedEnvironmentVariable(envvar);
    363     if (!saved_envvar)
    364       return false;
    365 
    366     const char* const value = getenv(saved_envvar);
    367     if (value) {
    368       setenv(envvar, value, 1 /* overwrite */);
    369       unsetenv(saved_envvar);
    370     }
    371 
    372     free(saved_envvar);
    373   }
    374 
    375   return true;
    376 }
    377 
    378 bool CheckAndExportApiVersion() {
    379   // Check the environment to see if a specific API version was requested.
    380   // assume version 0 if none.
    381   long api_number = -1;
    382   char* api_string = getenv(kSandboxEnvironmentApiRequest);
    383   if (!api_string) {
    384     api_number = 0;
    385   } else {
    386     errno = 0;
    387     char* endptr = NULL;
    388     api_number = strtol(api_string, &endptr, 10);
    389     if (!endptr || *endptr || errno != 0)
    390       return false;
    391   }
    392 
    393   // Warn only for now.
    394   if (api_number != kSUIDSandboxApiNumber) {
    395     fprintf(
    396         stderr,
    397         "The setuid sandbox provides API version %ld, "
    398         "but you need %ld\n"
    399         "Please read "
    400         "https://code.google.com/p/chromium/wiki/LinuxSUIDSandboxDevelopment."
    401         "\n\n",
    402         kSUIDSandboxApiNumber,
    403         api_number);
    404   }
    405 
    406   // Export our version so that the sandboxed process can verify it did not
    407   // use an old sandbox.
    408   char version_string[64];
    409   snprintf(
    410       version_string, sizeof(version_string), "%ld", kSUIDSandboxApiNumber);
    411   if (setenv(kSandboxEnvironmentApiProvides, version_string, 1)) {
    412     perror("setenv");
    413     return false;
    414   }
    415 
    416   return true;
    417 }
    418 
    419 int main(int argc, char** argv) {
    420   if (argc <= 1) {
    421     if (argc <= 0) {
    422       return 1;
    423     }
    424 
    425     fprintf(stderr, "Usage: %s <renderer process> <args...>\n", argv[0]);
    426     return 1;
    427   }
    428 
    429   // Allow someone to query our API version
    430   if (argc == 2 && 0 == strcmp(argv[1], kSuidSandboxGetApiSwitch)) {
    431     printf("%ld\n", kSUIDSandboxApiNumber);
    432     return 0;
    433   }
    434 
    435   // We cannot adjust /proc/pid/oom_adj for sandboxed renderers
    436   // because those files are owned by root. So we need a helper here.
    437   if (argc == 4 && (0 == strcmp(argv[1], kAdjustOOMScoreSwitch))) {
    438     char* endptr = NULL;
    439     long score;
    440     errno = 0;
    441     unsigned long pid_ul = strtoul(argv[2], &endptr, 10);
    442     if (pid_ul == ULONG_MAX || !endptr || *endptr || errno != 0)
    443       return 1;
    444     pid_t pid = pid_ul;
    445     endptr = NULL;
    446     errno = 0;
    447     score = strtol(argv[3], &endptr, 10);
    448     if (score == LONG_MAX || score == LONG_MIN || !endptr || *endptr ||
    449         errno != 0) {
    450       return 1;
    451     }
    452     return AdjustOOMScore(pid, score);
    453   }
    454 
    455   // Protect the core setuid sandbox functionality with an API version
    456   if (!CheckAndExportApiVersion()) {
    457     return 1;
    458   }
    459 
    460   if (geteuid() != 0) {
    461     fprintf(stderr,
    462             "The setuid sandbox is not running as root. Common causes:\n"
    463             "  * An unprivileged process using ptrace on it, like a debugger.\n"
    464             "  * A parent process set prctl(PR_SET_NO_NEW_PRIVS, ...)\n");
    465   }
    466 
    467   if (!MoveToNewNamespaces())
    468     return 1;
    469   if (!SpawnChrootHelper())
    470     return 1;
    471   if (!DropRoot())
    472     return 1;
    473   if (!SetupChildEnvironment())
    474     return 1;
    475 
    476   execv(argv[1], &argv[1]);
    477   FatalError("execv failed");
    478 
    479   return 1;
    480 }
    481