Home | History | Annotate | Download | only in suid
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // http://code.google.com/p/chromium/wiki/LinuxSUIDSandbox
      6 
      7 #include "common/sandbox.h"
      8 
      9 #define _GNU_SOURCE
     10 #include <asm/unistd.h>
     11 #include <errno.h>
     12 #include <fcntl.h>
     13 #include <limits.h>
     14 #include <sched.h>
     15 #include <signal.h>
     16 #include <stdarg.h>
     17 #include <stdbool.h>
     18 #include <stdint.h>
     19 #include <stdio.h>
     20 #include <stdlib.h>
     21 #include <string.h>
     22 #include <sys/prctl.h>
     23 #include <sys/resource.h>
     24 #include <sys/socket.h>
     25 #include <sys/stat.h>
     26 #include <sys/time.h>
     27 #include <sys/types.h>
     28 #include <sys/vfs.h>
     29 #include <sys/wait.h>
     30 #include <unistd.h>
     31 
     32 #include "linux_util.h"
     33 #include "process_util.h"
     34 #include "common/suid_unsafe_environment_variables.h"
     35 
     36 #if !defined(CLONE_NEWPID)
     37 #define CLONE_NEWPID 0x20000000
     38 #endif
     39 #if !defined(CLONE_NEWNET)
     40 #define CLONE_NEWNET 0x40000000
     41 #endif
     42 
     43 static bool DropRoot();
     44 
     45 #define HANDLE_EINTR(x) TEMP_FAILURE_RETRY(x)
     46 
     47 static void FatalError(const char *msg, ...)
     48     __attribute__((noreturn, format(printf, 1, 2)));
     49 
     50 static void FatalError(const char *msg, ...) {
     51   va_list ap;
     52   va_start(ap, msg);
     53 
     54   vfprintf(stderr, msg, ap);
     55   fprintf(stderr, ": %s\n", strerror(errno));
     56   fflush(stderr);
     57   va_end(ap);
     58   _exit(1);
     59 }
     60 
     61 // We will chroot() to the helper's /proc/self directory. Anything there will
     62 // not exist anymore if we make sure to wait() for the helper.
     63 //
     64 // /proc/self/fdinfo or /proc/self/fd are especially safe and will be empty
     65 // even if the helper survives as a zombie.
     66 //
     67 // There is very little reason to use fdinfo/ instead of fd/ but we are
     68 // paranoid. fdinfo/ only exists since 2.6.22 so we allow fallback to fd/
     69 #define SAFE_DIR "/proc/self/fdinfo"
     70 #define SAFE_DIR2 "/proc/self/fd"
     71 
     72 static bool SpawnChrootHelper() {
     73   int sv[2];
     74   if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
     75     perror("socketpair");
     76     return false;
     77   }
     78 
     79   char *safedir = NULL;
     80   struct stat sdir_stat;
     81   if (!stat(SAFE_DIR, &sdir_stat) && S_ISDIR(sdir_stat.st_mode))
     82     safedir = SAFE_DIR;
     83   else
     84     if (!stat(SAFE_DIR2, &sdir_stat) && S_ISDIR(sdir_stat.st_mode))
     85       safedir = SAFE_DIR2;
     86     else {
     87       fprintf(stderr, "Could not find %s\n", SAFE_DIR2);
     88       return false;
     89     }
     90 
     91   const pid_t pid = syscall(
     92       __NR_clone, CLONE_FS | SIGCHLD, 0, 0, 0);
     93 
     94   if (pid == -1) {
     95     perror("clone");
     96     close(sv[0]);
     97     close(sv[1]);
     98     return false;
     99   }
    100 
    101   if (pid == 0) {
    102     // We share our files structure with an untrusted process. As a security in
    103     // depth measure, we make sure that we can't open anything by mistake.
    104     // TODO(agl): drop CAP_SYS_RESOURCE / use SECURE_NOROOT
    105 
    106     const struct rlimit nofile = {0, 0};
    107     if (setrlimit(RLIMIT_NOFILE, &nofile))
    108       FatalError("Setting RLIMIT_NOFILE");
    109 
    110     if (close(sv[1]))
    111       FatalError("close");
    112 
    113     // wait for message
    114     char msg;
    115     ssize_t bytes;
    116     do {
    117       bytes = read(sv[0], &msg, 1);
    118     } while (bytes == -1 && errno == EINTR);
    119 
    120     if (bytes == 0)
    121       _exit(0);
    122     if (bytes != 1)
    123       FatalError("read");
    124 
    125     // do chrooting
    126     if (msg != kMsgChrootMe)
    127       FatalError("Unknown message from sandboxed process");
    128 
    129     // sanity check
    130     if (chdir(safedir))
    131       FatalError("Cannot chdir into /proc/ directory");
    132 
    133     if (chroot(safedir))
    134       FatalError("Cannot chroot into /proc/ directory");
    135 
    136     if (chdir("/"))
    137       FatalError("Cannot chdir to / after chroot");
    138 
    139     const char reply = kMsgChrootSuccessful;
    140     do {
    141       bytes = write(sv[0], &reply, 1);
    142     } while (bytes == -1 && errno == EINTR);
    143 
    144     if (bytes != 1)
    145       FatalError("Writing reply");
    146 
    147     _exit(0);
    148     // We now become a zombie. /proc/self/fd(info) is now an empty dir and we
    149     // are chrooted there.
    150     // Our (unprivileged) parent should not even be able to open "." or "/"
    151     // since they would need to pass the ptrace() check. If our parent wait()
    152     // for us, our root directory will completely disappear.
    153   }
    154 
    155   if (close(sv[0])) {
    156     close(sv[1]);
    157     perror("close");
    158     return false;
    159   }
    160 
    161   // In the parent process, we install an environment variable containing the
    162   // number of the file descriptor.
    163   char desc_str[64];
    164   int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]);
    165   if (printed < 0 || printed >= (int)sizeof(desc_str)) {
    166     fprintf(stderr, "Failed to snprintf\n");
    167     return false;
    168   }
    169 
    170   if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) {
    171     perror("setenv");
    172     close(sv[1]);
    173     return false;
    174   }
    175 
    176   // We also install an environment variable containing the pid of the child
    177   char helper_pid_str[64];
    178   printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid);
    179   if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) {
    180     fprintf(stderr, "Failed to snprintf\n");
    181     return false;
    182   }
    183 
    184   if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) {
    185     perror("setenv");
    186     close(sv[1]);
    187     return false;
    188   }
    189 
    190   return true;
    191 }
    192 
    193 // Block until child_pid exits, then exit. Try to preserve the exit code.
    194 static void WaitForChildAndExit(pid_t child_pid) {
    195   int exit_code = -1;
    196   siginfo_t reaped_child_info;
    197 
    198   int wait_ret =
    199     HANDLE_EINTR(waitid(P_PID, child_pid, &reaped_child_info, WEXITED));
    200 
    201   if (!wait_ret && reaped_child_info.si_pid == child_pid) {
    202     if (reaped_child_info.si_code == CLD_EXITED) {
    203       exit_code = reaped_child_info.si_status;
    204     } else {
    205       // Exit with code 0 if the child got signaled.
    206       exit_code = 0;
    207     }
    208   }
    209   _exit(exit_code);
    210 }
    211 
    212 static bool MoveToNewNamespaces() {
    213   // These are the sets of flags which we'll try, in order.
    214   const int kCloneExtraFlags[] = {
    215     CLONE_NEWPID | CLONE_NEWNET,
    216     CLONE_NEWPID,
    217   };
    218 
    219   // We need to close kZygoteIdFd before the child can continue. We use this
    220   // socketpair to tell the child when to continue;
    221   int sync_fds[2];
    222   if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) {
    223     FatalError("Failed to create a socketpair");
    224   }
    225 
    226   for (size_t i = 0;
    227        i < sizeof(kCloneExtraFlags) / sizeof(kCloneExtraFlags[0]);
    228        i++) {
    229     pid_t pid = syscall(__NR_clone, SIGCHLD | kCloneExtraFlags[i], 0, 0, 0);
    230 
    231     if (pid > 0) {
    232       if (!DropRoot()) {
    233         FatalError("Could not drop privileges");
    234       } else {
    235         if (close(sync_fds[0]) || shutdown(sync_fds[1], SHUT_RD))
    236           FatalError("Could not close socketpair");
    237         // The kZygoteIdFd needs to be closed in the parent before
    238         // Zygote gets started.
    239         if (close(kZygoteIdFd))
    240           FatalError("close");
    241         // Tell our child to continue
    242         if (HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) != 1)
    243           FatalError("send");
    244         if (close(sync_fds[1]))
    245           FatalError("close");
    246         // We want to keep a full process tree and we don't want our childs to
    247         // be reparented to (the outer PID namespace) init. So we wait for it.
    248         WaitForChildAndExit(pid);
    249       }
    250       // NOTREACHED
    251       FatalError("Not reached");
    252     }
    253 
    254     if (pid == 0) {
    255       if (close(sync_fds[1]) || shutdown(sync_fds[0], SHUT_WR))
    256         FatalError("Could not close socketpair");
    257 
    258       // Wait for the parent to confirm it closed kZygoteIdFd before we
    259       // continue
    260       char should_continue;
    261       if (HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)) != 1)
    262         FatalError("Read on socketpair");
    263       if (close(sync_fds[0]))
    264         FatalError("close");
    265 
    266       if (kCloneExtraFlags[i] & CLONE_NEWPID) {
    267         setenv(kSandboxPIDNSEnvironmentVarName, "", 1 /* overwrite */);
    268       } else {
    269         unsetenv(kSandboxPIDNSEnvironmentVarName);
    270       }
    271 
    272       if (kCloneExtraFlags[i] & CLONE_NEWNET) {
    273         setenv(kSandboxNETNSEnvironmentVarName, "", 1 /* overwrite */);
    274       } else {
    275         unsetenv(kSandboxNETNSEnvironmentVarName);
    276       }
    277 
    278       break;
    279     }
    280 
    281     if (errno != EINVAL) {
    282       perror("Failed to move to new PID namespace");
    283       return false;
    284     }
    285   }
    286 
    287   // If the system doesn't support NEWPID then we carry on anyway.
    288   return true;
    289 }
    290 
    291 static bool DropRoot() {
    292   if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) {
    293     perror("prctl(PR_SET_DUMPABLE)");
    294     return false;
    295   }
    296 
    297   if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) {
    298     perror("Still dumpable after prctl(PR_SET_DUMPABLE)");
    299     return false;
    300   }
    301 
    302   gid_t rgid, egid, sgid;
    303   if (getresgid(&rgid, &egid, &sgid)) {
    304     perror("getresgid");
    305     return false;
    306   }
    307 
    308   if (setresgid(rgid, rgid, rgid)) {
    309     perror("setresgid");
    310     return false;
    311   }
    312 
    313   uid_t ruid, euid, suid;
    314   if (getresuid(&ruid, &euid, &suid)) {
    315     perror("getresuid");
    316     return false;
    317   }
    318 
    319   if (setresuid(ruid, ruid, ruid)) {
    320     perror("setresuid");
    321     return false;
    322   }
    323 
    324   return true;
    325 }
    326 
    327 static bool SetupChildEnvironment() {
    328   unsigned i;
    329 
    330   // ld.so may have cleared several environment variables because we are SUID.
    331   // However, the child process might need them so zygote_host_linux.cc saves a
    332   // copy in SANDBOX_$x. This is safe because we have dropped root by this
    333   // point, so we can only exec a binary with the permissions of the user who
    334   // ran us in the first place.
    335 
    336   for (i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) {
    337     const char* const envvar = kSUIDUnsafeEnvironmentVariables[i];
    338     char* const saved_envvar = SandboxSavedEnvironmentVariable(envvar);
    339     if (!saved_envvar)
    340       return false;
    341 
    342     const char* const value = getenv(saved_envvar);
    343     if (value) {
    344       setenv(envvar, value, 1 /* overwrite */);
    345       unsetenv(saved_envvar);
    346     }
    347 
    348     free(saved_envvar);
    349   }
    350 
    351   return true;
    352 }
    353 
    354 bool CheckAndExportApiVersion() {
    355   // Check the environment to see if a specific API version was requested.
    356   // assume version 0 if none.
    357   long api_number = -1;
    358   char *api_string = getenv(kSandboxEnvironmentApiRequest);
    359   if (!api_string) {
    360     api_number = 0;
    361   } else {
    362     errno = 0;
    363     char* endptr = NULL;
    364     api_number = strtol(api_string, &endptr, 10);
    365     if (!endptr || *endptr || errno != 0)
    366       return false;
    367   }
    368 
    369   // Warn only for now.
    370   if (api_number != kSUIDSandboxApiNumber) {
    371     fprintf(stderr, "The setuid sandbox provides API version %ld, "
    372       "but you need %ld\n"
    373       "Please read "
    374       "https://code.google.com/p/chromium/wiki/LinuxSUIDSandboxDevelopment."
    375       "\n\n",
    376       kSUIDSandboxApiNumber,
    377       api_number);
    378   }
    379 
    380   // Export our version so that the sandboxed process can verify it did not
    381   // use an old sandbox.
    382   char version_string[64];
    383   snprintf(version_string, sizeof(version_string), "%ld",
    384            kSUIDSandboxApiNumber);
    385   if (setenv(kSandboxEnvironmentApiProvides, version_string, 1)) {
    386     perror("setenv");
    387     return false;
    388   }
    389 
    390   return true;
    391 }
    392 
    393 int main(int argc, char **argv) {
    394   if (argc <= 1) {
    395     if (argc <= 0) {
    396       return 1;
    397     }
    398 
    399     fprintf(stderr, "Usage: %s <renderer process> <args...>\n", argv[0]);
    400     return 1;
    401   }
    402 
    403   // Allow someone to query our API version
    404   if (argc == 2 && 0 == strcmp(argv[1], kSuidSandboxGetApiSwitch)) {
    405     printf("%ld\n", kSUIDSandboxApiNumber);
    406     return 0;
    407   }
    408 
    409   // In the SUID sandbox, if we succeed in calling MoveToNewNamespaces()
    410   // below, then the zygote and all the renderers are in an alternate PID
    411   // namespace and do not know their real PIDs. As such, they report the wrong
    412   // PIDs to the task manager.
    413   //
    414   // To fix this, when the zygote spawns a new renderer, it gives the renderer
    415   // a dummy socket, which has a unique inode number. Then it asks the sandbox
    416   // host to find the PID of the process holding that fd by searching /proc.
    417   //
    418   // Since the zygote and renderers are all spawned by this setuid executable,
    419   // their entries in /proc are owned by root and only readable by root. In
    420   // order to search /proc for the fd we want, this setuid executable has to
    421   // double as a helper and perform the search. The code block below does this
    422   // when you call it with --find-inode INODE_NUMBER.
    423   if (argc == 3 && (0 == strcmp(argv[1], kFindInodeSwitch))) {
    424     pid_t pid;
    425     char* endptr = NULL;
    426     errno = 0;
    427     ino_t inode = strtoull(argv[2], &endptr, 10);
    428     if (inode == ULLONG_MAX || !endptr || *endptr || errno != 0)
    429       return 1;
    430     if (!FindProcessHoldingSocket(&pid, inode))
    431       return 1;
    432     printf("%d\n", pid);
    433     return 0;
    434   }
    435   // Likewise, we cannot adjust /proc/pid/oom_adj for sandboxed renderers
    436   // because those files are owned by root. So we need another helper here.
    437   if (argc == 4 && (0 == strcmp(argv[1], kAdjustOOMScoreSwitch))) {
    438     char* endptr = NULL;
    439     long score;
    440     errno = 0;
    441     unsigned long pid_ul = strtoul(argv[2], &endptr, 10);
    442     if (pid_ul == ULONG_MAX || !endptr || *endptr || errno != 0)
    443       return 1;
    444     pid_t pid = pid_ul;
    445     endptr = NULL;
    446     errno = 0;
    447     score = strtol(argv[3], &endptr, 10);
    448     if (score == LONG_MAX || score == LONG_MIN ||
    449         !endptr || *endptr || errno != 0)
    450       return 1;
    451     return AdjustOOMScore(pid, score);
    452   }
    453 #if defined(OS_CHROMEOS)
    454   if (argc == 3 && (0 == strcmp(argv[1], kAdjustLowMemMarginSwitch))) {
    455     char* endptr = NULL;
    456     errno = 0;
    457     unsigned long margin_mb = strtoul(argv[2], &endptr, 10);
    458     if (!endptr || *endptr || errno != 0)
    459       return 1;
    460     return AdjustLowMemoryMargin(margin_mb);
    461   }
    462 #endif
    463 
    464   // Protect the core setuid sandbox functionality with an API version
    465   if (!CheckAndExportApiVersion()) {
    466     return 1;
    467   }
    468 
    469   if (!MoveToNewNamespaces())
    470     return 1;
    471   if (!SpawnChrootHelper())
    472     return 1;
    473   if (!DropRoot())
    474     return 1;
    475   if (!SetupChildEnvironment())
    476     return 1;
    477 
    478   execv(argv[1], &argv[1]);
    479   FatalError("execv failed");
    480 
    481   return 1;
    482 }
    483