1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // http://code.google.com/p/chromium/wiki/LinuxSUIDSandbox 6 7 #include "sandbox/linux/suid/common/sandbox.h" 8 9 #define _GNU_SOURCE 10 #include <asm/unistd.h> 11 #include <errno.h> 12 #include <fcntl.h> 13 #include <limits.h> 14 #include <sched.h> 15 #include <signal.h> 16 #include <stdarg.h> 17 #include <stdbool.h> 18 #include <stdint.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include <sys/prctl.h> 23 #include <sys/resource.h> 24 #include <sys/socket.h> 25 #include <sys/stat.h> 26 #include <sys/time.h> 27 #include <sys/types.h> 28 #include <sys/vfs.h> 29 #include <sys/wait.h> 30 #include <unistd.h> 31 32 #include "sandbox/linux/suid/common/suid_unsafe_environment_variables.h" 33 #include "sandbox/linux/suid/process_util.h" 34 35 #if !defined(CLONE_NEWPID) 36 #define CLONE_NEWPID 0x20000000 37 #endif 38 #if !defined(CLONE_NEWNET) 39 #define CLONE_NEWNET 0x40000000 40 #endif 41 42 static bool DropRoot(); 43 44 #define HANDLE_EINTR(x) TEMP_FAILURE_RETRY(x) 45 46 static void FatalError(const char* msg, ...) 47 __attribute__((noreturn, format(printf, 1, 2))); 48 49 static void FatalError(const char* msg, ...) { 50 va_list ap; 51 va_start(ap, msg); 52 53 vfprintf(stderr, msg, ap); 54 fprintf(stderr, ": %s\n", strerror(errno)); 55 fflush(stderr); 56 va_end(ap); 57 _exit(1); 58 } 59 60 static void ExitWithErrorSignalHandler(int signal) { 61 const char msg[] = "\nThe setuid sandbox got signaled, exiting.\n"; 62 if (-1 == write(2, msg, sizeof(msg) - 1)) { 63 // Do nothing. 64 } 65 66 _exit(1); 67 } 68 69 // We will chroot() to the helper's /proc/self directory. Anything there will 70 // not exist anymore if we make sure to wait() for the helper. 71 // 72 // /proc/self/fdinfo or /proc/self/fd are especially safe and will be empty 73 // even if the helper survives as a zombie. 74 // 75 // There is very little reason to use fdinfo/ instead of fd/ but we are 76 // paranoid. fdinfo/ only exists since 2.6.22 so we allow fallback to fd/ 77 #define SAFE_DIR "/proc/self/fdinfo" 78 #define SAFE_DIR2 "/proc/self/fd" 79 80 static bool SpawnChrootHelper() { 81 int sv[2]; 82 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 83 perror("socketpair"); 84 return false; 85 } 86 87 char* safedir = NULL; 88 struct stat sdir_stat; 89 if (!stat(SAFE_DIR, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) { 90 safedir = SAFE_DIR; 91 } else if (!stat(SAFE_DIR2, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) { 92 safedir = SAFE_DIR2; 93 } else { 94 fprintf(stderr, "Could not find %s\n", SAFE_DIR2); 95 return false; 96 } 97 98 const pid_t pid = syscall(__NR_clone, CLONE_FS | SIGCHLD, 0, 0, 0); 99 100 if (pid == -1) { 101 perror("clone"); 102 close(sv[0]); 103 close(sv[1]); 104 return false; 105 } 106 107 if (pid == 0) { 108 // We share our files structure with an untrusted process. As a security in 109 // depth measure, we make sure that we can't open anything by mistake. 110 // TODO(agl): drop CAP_SYS_RESOURCE / use SECURE_NOROOT 111 112 const struct rlimit nofile = {0, 0}; 113 if (setrlimit(RLIMIT_NOFILE, &nofile)) 114 FatalError("Setting RLIMIT_NOFILE"); 115 116 if (close(sv[1])) 117 FatalError("close"); 118 119 // wait for message 120 char msg; 121 ssize_t bytes; 122 do { 123 bytes = read(sv[0], &msg, 1); 124 } while (bytes == -1 && errno == EINTR); 125 126 if (bytes == 0) 127 _exit(0); 128 if (bytes != 1) 129 FatalError("read"); 130 131 // do chrooting 132 if (msg != kMsgChrootMe) 133 FatalError("Unknown message from sandboxed process"); 134 135 // sanity check 136 if (chdir(safedir)) 137 FatalError("Cannot chdir into /proc/ directory"); 138 139 if (chroot(safedir)) 140 FatalError("Cannot chroot into /proc/ directory"); 141 142 if (chdir("/")) 143 FatalError("Cannot chdir to / after chroot"); 144 145 const char reply = kMsgChrootSuccessful; 146 do { 147 bytes = write(sv[0], &reply, 1); 148 } while (bytes == -1 && errno == EINTR); 149 150 if (bytes != 1) 151 FatalError("Writing reply"); 152 153 _exit(0); 154 // We now become a zombie. /proc/self/fd(info) is now an empty dir and we 155 // are chrooted there. 156 // Our (unprivileged) parent should not even be able to open "." or "/" 157 // since they would need to pass the ptrace() check. If our parent wait() 158 // for us, our root directory will completely disappear. 159 } 160 161 if (close(sv[0])) { 162 close(sv[1]); 163 perror("close"); 164 return false; 165 } 166 167 // In the parent process, we install an environment variable containing the 168 // number of the file descriptor. 169 char desc_str[64]; 170 int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]); 171 if (printed < 0 || printed >= (int)sizeof(desc_str)) { 172 fprintf(stderr, "Failed to snprintf\n"); 173 return false; 174 } 175 176 if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) { 177 perror("setenv"); 178 close(sv[1]); 179 return false; 180 } 181 182 // We also install an environment variable containing the pid of the child 183 char helper_pid_str[64]; 184 printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid); 185 if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) { 186 fprintf(stderr, "Failed to snprintf\n"); 187 return false; 188 } 189 190 if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) { 191 perror("setenv"); 192 close(sv[1]); 193 return false; 194 } 195 196 return true; 197 } 198 199 // Block until child_pid exits, then exit. Try to preserve the exit code. 200 static void WaitForChildAndExit(pid_t child_pid) { 201 int exit_code = -1; 202 siginfo_t reaped_child_info; 203 204 // Don't "Core" on SIGABRT. SIGABRT is sent by the Chrome OS session manager 205 // when things are hanging. 206 // Here, the current process is going to waitid() and _exit(), so there is no 207 // point in generating a crash report. The child process is the one 208 // blocking us. 209 if (signal(SIGABRT, ExitWithErrorSignalHandler) == SIG_ERR) { 210 FatalError("Failed to change signal handler"); 211 } 212 213 int wait_ret = 214 HANDLE_EINTR(waitid(P_PID, child_pid, &reaped_child_info, WEXITED)); 215 216 if (!wait_ret && reaped_child_info.si_pid == child_pid) { 217 if (reaped_child_info.si_code == CLD_EXITED) { 218 exit_code = reaped_child_info.si_status; 219 } else { 220 // Exit with code 0 if the child got signaled. 221 exit_code = 0; 222 } 223 } 224 _exit(exit_code); 225 } 226 227 static bool MoveToNewNamespaces() { 228 // These are the sets of flags which we'll try, in order. 229 const int kCloneExtraFlags[] = {CLONE_NEWPID | CLONE_NEWNET, CLONE_NEWPID, }; 230 231 // We need to close kZygoteIdFd before the child can continue. We use this 232 // socketpair to tell the child when to continue; 233 int sync_fds[2]; 234 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) { 235 FatalError("Failed to create a socketpair"); 236 } 237 238 for (size_t i = 0; i < sizeof(kCloneExtraFlags) / sizeof(kCloneExtraFlags[0]); 239 i++) { 240 pid_t pid = syscall(__NR_clone, SIGCHLD | kCloneExtraFlags[i], 0, 0, 0); 241 const int clone_errno = errno; 242 243 if (pid > 0) { 244 if (!DropRoot()) { 245 FatalError("Could not drop privileges"); 246 } else { 247 if (close(sync_fds[0]) || shutdown(sync_fds[1], SHUT_RD)) 248 FatalError("Could not close socketpair"); 249 // The kZygoteIdFd needs to be closed in the parent before 250 // Zygote gets started. 251 if (close(kZygoteIdFd)) 252 FatalError("close"); 253 // Tell our child to continue 254 if (HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) != 1) 255 FatalError("send"); 256 if (close(sync_fds[1])) 257 FatalError("close"); 258 // We want to keep a full process tree and we don't want our childs to 259 // be reparented to (the outer PID namespace) init. So we wait for it. 260 WaitForChildAndExit(pid); 261 } 262 // NOTREACHED 263 FatalError("Not reached"); 264 } 265 266 if (pid == 0) { 267 if (close(sync_fds[1]) || shutdown(sync_fds[0], SHUT_WR)) 268 FatalError("Could not close socketpair"); 269 270 // Wait for the parent to confirm it closed kZygoteIdFd before we 271 // continue 272 char should_continue; 273 if (HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)) != 1) 274 FatalError("Read on socketpair"); 275 if (close(sync_fds[0])) 276 FatalError("close"); 277 278 if (kCloneExtraFlags[i] & CLONE_NEWPID) { 279 setenv(kSandboxPIDNSEnvironmentVarName, "", 1 /* overwrite */); 280 } else { 281 unsetenv(kSandboxPIDNSEnvironmentVarName); 282 } 283 284 if (kCloneExtraFlags[i] & CLONE_NEWNET) { 285 setenv(kSandboxNETNSEnvironmentVarName, "", 1 /* overwrite */); 286 } else { 287 unsetenv(kSandboxNETNSEnvironmentVarName); 288 } 289 290 break; 291 } 292 293 // If EINVAL then the system doesn't support the requested flags, so 294 // continue to try a different set. 295 // On any other errno value the system *does* support these flags but 296 // something went wrong, hence we bail with an error message rather then 297 // provide less security. 298 if (errno != EINVAL) { 299 fprintf(stderr, "Failed to move to new namespace:"); 300 if (kCloneExtraFlags[i] & CLONE_NEWPID) { 301 fprintf(stderr, " PID namespaces supported,"); 302 } 303 if (kCloneExtraFlags[i] & CLONE_NEWNET) { 304 fprintf(stderr, " Network namespace supported,"); 305 } 306 fprintf(stderr, " but failed: errno = %s\n", strerror(clone_errno)); 307 return false; 308 } 309 } 310 311 // If the system doesn't support NEWPID then we carry on anyway. 312 return true; 313 } 314 315 static bool DropRoot() { 316 if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) { 317 perror("prctl(PR_SET_DUMPABLE)"); 318 return false; 319 } 320 321 if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) { 322 perror("Still dumpable after prctl(PR_SET_DUMPABLE)"); 323 return false; 324 } 325 326 gid_t rgid, egid, sgid; 327 if (getresgid(&rgid, &egid, &sgid)) { 328 perror("getresgid"); 329 return false; 330 } 331 332 if (setresgid(rgid, rgid, rgid)) { 333 perror("setresgid"); 334 return false; 335 } 336 337 uid_t ruid, euid, suid; 338 if (getresuid(&ruid, &euid, &suid)) { 339 perror("getresuid"); 340 return false; 341 } 342 343 if (setresuid(ruid, ruid, ruid)) { 344 perror("setresuid"); 345 return false; 346 } 347 348 return true; 349 } 350 351 static bool SetupChildEnvironment() { 352 unsigned i; 353 354 // ld.so may have cleared several environment variables because we are SUID. 355 // However, the child process might need them so zygote_host_linux.cc saves a 356 // copy in SANDBOX_$x. This is safe because we have dropped root by this 357 // point, so we can only exec a binary with the permissions of the user who 358 // ran us in the first place. 359 360 for (i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) { 361 const char* const envvar = kSUIDUnsafeEnvironmentVariables[i]; 362 char* const saved_envvar = SandboxSavedEnvironmentVariable(envvar); 363 if (!saved_envvar) 364 return false; 365 366 const char* const value = getenv(saved_envvar); 367 if (value) { 368 setenv(envvar, value, 1 /* overwrite */); 369 unsetenv(saved_envvar); 370 } 371 372 free(saved_envvar); 373 } 374 375 return true; 376 } 377 378 bool CheckAndExportApiVersion() { 379 // Check the environment to see if a specific API version was requested. 380 // assume version 0 if none. 381 long api_number = -1; 382 char* api_string = getenv(kSandboxEnvironmentApiRequest); 383 if (!api_string) { 384 api_number = 0; 385 } else { 386 errno = 0; 387 char* endptr = NULL; 388 api_number = strtol(api_string, &endptr, 10); 389 if (!endptr || *endptr || errno != 0) 390 return false; 391 } 392 393 // Warn only for now. 394 if (api_number != kSUIDSandboxApiNumber) { 395 fprintf( 396 stderr, 397 "The setuid sandbox provides API version %ld, " 398 "but you need %ld\n" 399 "Please read " 400 "https://code.google.com/p/chromium/wiki/LinuxSUIDSandboxDevelopment." 401 "\n\n", 402 kSUIDSandboxApiNumber, 403 api_number); 404 } 405 406 // Export our version so that the sandboxed process can verify it did not 407 // use an old sandbox. 408 char version_string[64]; 409 snprintf( 410 version_string, sizeof(version_string), "%ld", kSUIDSandboxApiNumber); 411 if (setenv(kSandboxEnvironmentApiProvides, version_string, 1)) { 412 perror("setenv"); 413 return false; 414 } 415 416 return true; 417 } 418 419 int main(int argc, char** argv) { 420 if (argc <= 1) { 421 if (argc <= 0) { 422 return 1; 423 } 424 425 fprintf(stderr, "Usage: %s <renderer process> <args...>\n", argv[0]); 426 return 1; 427 } 428 429 // Allow someone to query our API version 430 if (argc == 2 && 0 == strcmp(argv[1], kSuidSandboxGetApiSwitch)) { 431 printf("%ld\n", kSUIDSandboxApiNumber); 432 return 0; 433 } 434 435 // We cannot adjust /proc/pid/oom_adj for sandboxed renderers 436 // because those files are owned by root. So we need a helper here. 437 if (argc == 4 && (0 == strcmp(argv[1], kAdjustOOMScoreSwitch))) { 438 char* endptr = NULL; 439 long score; 440 errno = 0; 441 unsigned long pid_ul = strtoul(argv[2], &endptr, 10); 442 if (pid_ul == ULONG_MAX || !endptr || *endptr || errno != 0) 443 return 1; 444 pid_t pid = pid_ul; 445 endptr = NULL; 446 errno = 0; 447 score = strtol(argv[3], &endptr, 10); 448 if (score == LONG_MAX || score == LONG_MIN || !endptr || *endptr || 449 errno != 0) { 450 return 1; 451 } 452 return AdjustOOMScore(pid, score); 453 } 454 455 // Protect the core setuid sandbox functionality with an API version 456 if (!CheckAndExportApiVersion()) { 457 return 1; 458 } 459 460 if (geteuid() != 0) { 461 fprintf(stderr, 462 "The setuid sandbox is not running as root. Common causes:\n" 463 " * An unprivileged process using ptrace on it, like a debugger.\n" 464 " * A parent process set prctl(PR_SET_NO_NEW_PRIVS, ...)\n"); 465 } 466 467 if (!MoveToNewNamespaces()) 468 return 1; 469 if (!SpawnChrootHelper()) 470 return 1; 471 if (!DropRoot()) 472 return 1; 473 if (!SetupChildEnvironment()) 474 return 1; 475 476 execv(argv[1], &argv[1]); 477 FatalError("execv failed"); 478 479 return 1; 480 } 481