1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // https://chromium.googlesource.com/chromium/src/+/master/docs/linux_suid_sandbox.md 6 7 #include "sandbox/linux/suid/common/sandbox.h" 8 9 #define _GNU_SOURCE 10 #include <asm/unistd.h> 11 #include <errno.h> 12 #include <fcntl.h> 13 #include <limits.h> 14 #include <sched.h> 15 #include <signal.h> 16 #include <stdarg.h> 17 #include <stdbool.h> 18 #include <stddef.h> 19 #include <stdint.h> 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <string.h> 23 #include <sys/prctl.h> 24 #include <sys/resource.h> 25 #include <sys/socket.h> 26 #include <sys/stat.h> 27 #include <sys/time.h> 28 #include <sys/types.h> 29 #include <sys/vfs.h> 30 #include <sys/wait.h> 31 #include <unistd.h> 32 33 #include "sandbox/linux/suid/common/suid_unsafe_environment_variables.h" 34 #include "sandbox/linux/suid/process_util.h" 35 36 #if !defined(CLONE_NEWPID) 37 #define CLONE_NEWPID 0x20000000 38 #endif 39 #if !defined(CLONE_NEWNET) 40 #define CLONE_NEWNET 0x40000000 41 #endif 42 43 static bool DropRoot(); 44 45 #define HANDLE_EINTR(x) TEMP_FAILURE_RETRY(x) 46 47 static void FatalError(const char* msg, ...) 48 __attribute__((noreturn, format(printf, 1, 2))); 49 50 static void FatalError(const char* msg, ...) { 51 va_list ap; 52 va_start(ap, msg); 53 54 vfprintf(stderr, msg, ap); 55 fprintf(stderr, ": %s\n", strerror(errno)); 56 fflush(stderr); 57 va_end(ap); 58 _exit(1); 59 } 60 61 static void ExitWithErrorSignalHandler(int signal) { 62 const char msg[] = "\nThe setuid sandbox got signaled, exiting.\n"; 63 if (-1 == write(2, msg, sizeof(msg) - 1)) { 64 // Do nothing. 65 } 66 67 _exit(1); 68 } 69 70 // We will chroot() to the helper's /proc/self directory. Anything there will 71 // not exist anymore if we make sure to wait() for the helper. 72 // 73 // /proc/self/fdinfo or /proc/self/fd are especially safe and will be empty 74 // even if the helper survives as a zombie. 75 // 76 // There is very little reason to use fdinfo/ instead of fd/ but we are 77 // paranoid. fdinfo/ only exists since 2.6.22 so we allow fallback to fd/ 78 #define SAFE_DIR "/proc/self/fdinfo" 79 #define SAFE_DIR2 "/proc/self/fd" 80 81 static bool SpawnChrootHelper() { 82 int sv[2]; 83 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 84 perror("socketpair"); 85 return false; 86 } 87 88 char* safedir = NULL; 89 struct stat sdir_stat; 90 if (!stat(SAFE_DIR, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) { 91 safedir = SAFE_DIR; 92 } else if (!stat(SAFE_DIR2, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) { 93 safedir = SAFE_DIR2; 94 } else { 95 fprintf(stderr, "Could not find %s\n", SAFE_DIR2); 96 return false; 97 } 98 99 const pid_t pid = syscall(__NR_clone, CLONE_FS | SIGCHLD, 0, 0, 0); 100 101 if (pid == -1) { 102 perror("clone"); 103 close(sv[0]); 104 close(sv[1]); 105 return false; 106 } 107 108 if (pid == 0) { 109 // We share our files structure with an untrusted process. As a security in 110 // depth measure, we make sure that we can't open anything by mistake. 111 // TODO(agl): drop CAP_SYS_RESOURCE / use SECURE_NOROOT 112 113 const struct rlimit nofile = {0, 0}; 114 if (setrlimit(RLIMIT_NOFILE, &nofile)) 115 FatalError("Setting RLIMIT_NOFILE"); 116 117 if (close(sv[1])) 118 FatalError("close"); 119 120 // wait for message 121 char msg; 122 ssize_t bytes; 123 do { 124 bytes = read(sv[0], &msg, 1); 125 } while (bytes == -1 && errno == EINTR); 126 127 if (bytes == 0) 128 _exit(0); 129 if (bytes != 1) 130 FatalError("read"); 131 132 // do chrooting 133 if (msg != kMsgChrootMe) 134 FatalError("Unknown message from sandboxed process"); 135 136 // sanity check 137 if (chdir(safedir)) 138 FatalError("Cannot chdir into /proc/ directory"); 139 140 if (chroot(safedir)) 141 FatalError("Cannot chroot into /proc/ directory"); 142 143 if (chdir("/")) 144 FatalError("Cannot chdir to / after chroot"); 145 146 const char reply = kMsgChrootSuccessful; 147 do { 148 bytes = write(sv[0], &reply, 1); 149 } while (bytes == -1 && errno == EINTR); 150 151 if (bytes != 1) 152 FatalError("Writing reply"); 153 154 _exit(0); 155 // We now become a zombie. /proc/self/fd(info) is now an empty dir and we 156 // are chrooted there. 157 // Our (unprivileged) parent should not even be able to open "." or "/" 158 // since they would need to pass the ptrace() check. If our parent wait() 159 // for us, our root directory will completely disappear. 160 } 161 162 if (close(sv[0])) { 163 close(sv[1]); 164 perror("close"); 165 return false; 166 } 167 168 // In the parent process, we install an environment variable containing the 169 // number of the file descriptor. 170 char desc_str[64]; 171 int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]); 172 if (printed < 0 || printed >= (int)sizeof(desc_str)) { 173 fprintf(stderr, "Failed to snprintf\n"); 174 return false; 175 } 176 177 if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) { 178 perror("setenv"); 179 close(sv[1]); 180 return false; 181 } 182 183 // We also install an environment variable containing the pid of the child 184 char helper_pid_str[64]; 185 printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid); 186 if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) { 187 fprintf(stderr, "Failed to snprintf\n"); 188 return false; 189 } 190 191 if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) { 192 perror("setenv"); 193 close(sv[1]); 194 return false; 195 } 196 197 return true; 198 } 199 200 // Block until child_pid exits, then exit. Try to preserve the exit code. 201 static void WaitForChildAndExit(pid_t child_pid) { 202 int exit_code = -1; 203 siginfo_t reaped_child_info; 204 205 // Don't "Core" on SIGABRT. SIGABRT is sent by the Chrome OS session manager 206 // when things are hanging. 207 // Here, the current process is going to waitid() and _exit(), so there is no 208 // point in generating a crash report. The child process is the one 209 // blocking us. 210 if (signal(SIGABRT, ExitWithErrorSignalHandler) == SIG_ERR) { 211 FatalError("Failed to change signal handler"); 212 } 213 214 int wait_ret = 215 HANDLE_EINTR(waitid(P_PID, child_pid, &reaped_child_info, WEXITED)); 216 217 if (!wait_ret && reaped_child_info.si_pid == child_pid) { 218 if (reaped_child_info.si_code == CLD_EXITED) { 219 exit_code = reaped_child_info.si_status; 220 } else { 221 // Exit with code 0 if the child got signaled. 222 exit_code = 0; 223 } 224 } 225 _exit(exit_code); 226 } 227 228 static bool MoveToNewNamespaces() { 229 // These are the sets of flags which we'll try, in order. 230 const int kCloneExtraFlags[] = {CLONE_NEWPID | CLONE_NEWNET, CLONE_NEWPID, }; 231 232 // We need to close kZygoteIdFd before the child can continue. We use this 233 // socketpair to tell the child when to continue; 234 int sync_fds[2]; 235 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) { 236 FatalError("Failed to create a socketpair"); 237 } 238 239 for (size_t i = 0; i < sizeof(kCloneExtraFlags) / sizeof(kCloneExtraFlags[0]); 240 i++) { 241 pid_t pid = syscall(__NR_clone, SIGCHLD | kCloneExtraFlags[i], 0, 0, 0); 242 const int clone_errno = errno; 243 244 if (pid > 0) { 245 if (!DropRoot()) { 246 FatalError("Could not drop privileges"); 247 } else { 248 if (close(sync_fds[0]) || shutdown(sync_fds[1], SHUT_RD)) 249 FatalError("Could not close socketpair"); 250 // The kZygoteIdFd needs to be closed in the parent before 251 // Zygote gets started. 252 if (close(kZygoteIdFd)) 253 FatalError("close"); 254 // Tell our child to continue 255 if (HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) != 1) 256 FatalError("send"); 257 if (close(sync_fds[1])) 258 FatalError("close"); 259 // We want to keep a full process tree and we don't want our childs to 260 // be reparented to (the outer PID namespace) init. So we wait for it. 261 WaitForChildAndExit(pid); 262 } 263 // NOTREACHED 264 FatalError("Not reached"); 265 } 266 267 if (pid == 0) { 268 if (close(sync_fds[1]) || shutdown(sync_fds[0], SHUT_WR)) 269 FatalError("Could not close socketpair"); 270 271 // Wait for the parent to confirm it closed kZygoteIdFd before we 272 // continue 273 char should_continue; 274 if (HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)) != 1) 275 FatalError("Read on socketpair"); 276 if (close(sync_fds[0])) 277 FatalError("close"); 278 279 if (kCloneExtraFlags[i] & CLONE_NEWPID) { 280 setenv(kSandboxPIDNSEnvironmentVarName, "", 1 /* overwrite */); 281 } else { 282 unsetenv(kSandboxPIDNSEnvironmentVarName); 283 } 284 285 if (kCloneExtraFlags[i] & CLONE_NEWNET) { 286 setenv(kSandboxNETNSEnvironmentVarName, "", 1 /* overwrite */); 287 } else { 288 unsetenv(kSandboxNETNSEnvironmentVarName); 289 } 290 291 break; 292 } 293 294 // If EINVAL then the system doesn't support the requested flags, so 295 // continue to try a different set. 296 // On any other errno value the system *does* support these flags but 297 // something went wrong, hence we bail with an error message rather then 298 // provide less security. 299 if (errno != EINVAL) { 300 fprintf(stderr, "Failed to move to new namespace:"); 301 if (kCloneExtraFlags[i] & CLONE_NEWPID) { 302 fprintf(stderr, " PID namespaces supported,"); 303 } 304 if (kCloneExtraFlags[i] & CLONE_NEWNET) { 305 fprintf(stderr, " Network namespace supported,"); 306 } 307 fprintf(stderr, " but failed: errno = %s\n", strerror(clone_errno)); 308 return false; 309 } 310 } 311 312 // If the system doesn't support NEWPID then we carry on anyway. 313 return true; 314 } 315 316 static bool DropRoot() { 317 if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) { 318 perror("prctl(PR_SET_DUMPABLE)"); 319 return false; 320 } 321 322 if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) { 323 perror("Still dumpable after prctl(PR_SET_DUMPABLE)"); 324 return false; 325 } 326 327 gid_t rgid, egid, sgid; 328 if (getresgid(&rgid, &egid, &sgid)) { 329 perror("getresgid"); 330 return false; 331 } 332 333 if (setresgid(rgid, rgid, rgid)) { 334 perror("setresgid"); 335 return false; 336 } 337 338 uid_t ruid, euid, suid; 339 if (getresuid(&ruid, &euid, &suid)) { 340 perror("getresuid"); 341 return false; 342 } 343 344 if (setresuid(ruid, ruid, ruid)) { 345 perror("setresuid"); 346 return false; 347 } 348 349 return true; 350 } 351 352 static bool SetupChildEnvironment() { 353 unsigned i; 354 355 // ld.so may have cleared several environment variables because we are SUID. 356 // However, the child process might need them so zygote_host_linux.cc saves a 357 // copy in SANDBOX_$x. This is safe because we have dropped root by this 358 // point, so we can only exec a binary with the permissions of the user who 359 // ran us in the first place. 360 361 for (i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) { 362 const char* const envvar = kSUIDUnsafeEnvironmentVariables[i]; 363 char* const saved_envvar = SandboxSavedEnvironmentVariable(envvar); 364 if (!saved_envvar) 365 return false; 366 367 const char* const value = getenv(saved_envvar); 368 if (value) { 369 setenv(envvar, value, 1 /* overwrite */); 370 unsetenv(saved_envvar); 371 } 372 373 free(saved_envvar); 374 } 375 376 return true; 377 } 378 379 bool CheckAndExportApiVersion() { 380 // Check the environment to see if a specific API version was requested. 381 // assume version 0 if none. 382 int api_number = -1; 383 char* api_string = getenv(kSandboxEnvironmentApiRequest); 384 if (!api_string) { 385 api_number = 0; 386 } else { 387 errno = 0; 388 char* endptr = NULL; 389 long long_api_number = strtol(api_string, &endptr, 10); 390 if (!endptr || *endptr || errno != 0 || long_api_number < INT_MIN || 391 long_api_number > INT_MAX) { 392 return false; 393 } 394 api_number = long_api_number; 395 } 396 397 // Warn only for now. 398 if (api_number != kSUIDSandboxApiNumber) { 399 fprintf( 400 stderr, 401 "The setuid sandbox provides API version %d, " 402 "but you need %d\n" 403 "Please read " 404 "https://chromium.googlesource.com/chromium/src/+/master/docs/linux_suid_sandbox_development.md." 405 "\n\n", 406 kSUIDSandboxApiNumber, 407 api_number); 408 } 409 410 // Export our version so that the sandboxed process can verify it did not 411 // use an old sandbox. 412 char version_string[64]; 413 snprintf(version_string, sizeof(version_string), "%d", kSUIDSandboxApiNumber); 414 if (setenv(kSandboxEnvironmentApiProvides, version_string, 1)) { 415 perror("setenv"); 416 return false; 417 } 418 419 return true; 420 } 421 422 int main(int argc, char** argv) { 423 if (argc <= 1) { 424 if (argc <= 0) { 425 return 1; 426 } 427 428 fprintf(stderr, "Usage: %s <renderer process> <args...>\n", argv[0]); 429 return 1; 430 } 431 432 // Allow someone to query our API version 433 if (argc == 2 && 0 == strcmp(argv[1], kSuidSandboxGetApiSwitch)) { 434 printf("%d\n", kSUIDSandboxApiNumber); 435 return 0; 436 } 437 438 // We cannot adjust /proc/pid/oom_adj for sandboxed renderers 439 // because those files are owned by root. So we need a helper here. 440 if (argc == 4 && (0 == strcmp(argv[1], kAdjustOOMScoreSwitch))) { 441 char* endptr = NULL; 442 long score; 443 errno = 0; 444 unsigned long pid_ul = strtoul(argv[2], &endptr, 10); 445 if (pid_ul == ULONG_MAX || !endptr || *endptr || errno != 0) 446 return 1; 447 pid_t pid = pid_ul; 448 endptr = NULL; 449 errno = 0; 450 score = strtol(argv[3], &endptr, 10); 451 if (score == LONG_MAX || score == LONG_MIN || !endptr || *endptr || 452 errno != 0) { 453 return 1; 454 } 455 return AdjustOOMScore(pid, score); 456 } 457 458 // Protect the core setuid sandbox functionality with an API version 459 if (!CheckAndExportApiVersion()) { 460 return 1; 461 } 462 463 if (geteuid() != 0) { 464 fprintf(stderr, 465 "The setuid sandbox is not running as root. Common causes:\n" 466 " * An unprivileged process using ptrace on it, like a debugger.\n" 467 " * A parent process set prctl(PR_SET_NO_NEW_PRIVS, ...)\n"); 468 } 469 470 if (!MoveToNewNamespaces()) 471 return 1; 472 if (!SpawnChrootHelper()) 473 return 1; 474 if (!DropRoot()) 475 return 1; 476 if (!SetupChildEnvironment()) 477 return 1; 478 479 execv(argv[1], &argv[1]); 480 FatalError("execv failed"); 481 482 return 1; 483 } 484