1 /* Copyright 2017 The Chromium OS Authors. All rights reserved. 2 * Use of this source code is governed by a BSD-style license that can be 3 * found in the LICENSE file. 4 */ 5 6 #include "system.h" 7 8 #include <errno.h> 9 #include <fcntl.h> 10 #include <grp.h> 11 #include <net/if.h> 12 #include <pwd.h> 13 #include <stdbool.h> 14 #include <stdio.h> 15 #include <string.h> 16 #include <sys/ioctl.h> 17 #include <sys/prctl.h> 18 #include <sys/socket.h> 19 #include <sys/stat.h> 20 #include <sys/statvfs.h> 21 #include <unistd.h> 22 23 #include <linux/securebits.h> 24 25 #include "util.h" 26 27 /* 28 * SECBIT_NO_CAP_AMBIENT_RAISE was added in kernel 4.3, so fill in the 29 * definition if the securebits header doesn't provide it. 30 */ 31 #ifndef SECBIT_NO_CAP_AMBIENT_RAISE 32 #define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(6)) 33 #endif 34 35 #ifndef SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED 36 #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED (issecure_mask(7)) 37 #endif 38 39 /* 40 * Assert the value of SECURE_ALL_BITS at compile-time. 41 * Android devices are currently compiled against 4.4 kernel headers. Kernel 4.3 42 * added a new securebit. 43 * When a new securebit is added, the new SECURE_ALL_BITS mask will return EPERM 44 * when used on older kernels. The compile-time assert will catch this situation 45 * at compile time. 46 */ 47 #if defined(__ANDROID__) 48 _Static_assert(SECURE_ALL_BITS == 0x55, "SECURE_ALL_BITS == 0x55."); 49 #endif 50 51 int secure_noroot_set_and_locked(uint64_t mask) 52 { 53 return (mask & (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED)) == 54 (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED); 55 } 56 57 int lock_securebits(uint64_t skip_mask, bool require_keep_caps) 58 { 59 /* The general idea is to set all bits, subject to exceptions below. */ 60 unsigned long securebits = SECURE_ALL_BITS | SECURE_ALL_LOCKS; 61 62 /* 63 * SECBIT_KEEP_CAPS is special in that it is automatically cleared on 64 * execve(2). This implies that attempts to set SECBIT_KEEP_CAPS (as is 65 * the default) in processes that have it locked already (such as nested 66 * minijail usage) would fail. Thus, unless the caller requires it, 67 * allow it to remain off if it is already locked. 68 */ 69 if (!require_keep_caps) { 70 int current_securebits = prctl(PR_GET_SECUREBITS); 71 if (current_securebits < 0) { 72 pwarn("prctl(PR_GET_SECUREBITS) failed"); 73 return -1; 74 } 75 76 if ((current_securebits & SECBIT_KEEP_CAPS_LOCKED) != 0 && 77 (current_securebits & SECBIT_KEEP_CAPS) == 0) { 78 securebits &= ~SECBIT_KEEP_CAPS; 79 } 80 } 81 82 /* 83 * Ambient capabilities can only be raised if they're already present 84 * in the permitted *and* inheritable set. Therefore, we don't really 85 * need to lock the NO_CAP_AMBIENT_RAISE securebit, since we are already 86 * configuring the permitted and inheritable set. 87 */ 88 securebits &= 89 ~(SECBIT_NO_CAP_AMBIENT_RAISE | SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED); 90 91 /* Don't set any bits that the user requested not to be touched. */ 92 securebits &= ~skip_mask; 93 94 if (!securebits) { 95 warn("not locking any securebits"); 96 return 0; 97 } 98 int securebits_ret = prctl(PR_SET_SECUREBITS, securebits); 99 if (securebits_ret < 0) { 100 pwarn("prctl(PR_SET_SECUREBITS) failed"); 101 return -1; 102 } 103 104 return 0; 105 } 106 107 int write_proc_file(pid_t pid, const char *content, const char *basename) 108 { 109 int fd, ret; 110 size_t sz, len; 111 ssize_t written; 112 char filename[32]; 113 114 sz = sizeof(filename); 115 ret = snprintf(filename, sz, "/proc/%d/%s", pid, basename); 116 if (ret < 0 || (size_t)ret >= sz) { 117 warn("failed to generate %s filename", basename); 118 return -1; 119 } 120 121 fd = open(filename, O_WRONLY | O_CLOEXEC); 122 if (fd < 0) { 123 pwarn("failed to open '%s'", filename); 124 return -errno; 125 } 126 127 len = strlen(content); 128 written = write(fd, content, len); 129 if (written < 0) { 130 pwarn("failed to write '%s'", filename); 131 return -errno; 132 } 133 134 if ((size_t)written < len) { 135 warn("failed to write %zu bytes to '%s'", len, filename); 136 return -1; 137 } 138 close(fd); 139 return 0; 140 } 141 142 /* 143 * We specifically do not use cap_valid() as that only tells us the last 144 * valid cap we were *compiled* against (i.e. what the version of kernel 145 * headers says). If we run on a different kernel version, then it's not 146 * uncommon for that to be less (if an older kernel) or more (if a newer 147 * kernel). 148 * Normally, we suck up the answer via /proc. On Android, not all processes are 149 * guaranteed to be able to access '/proc/sys/kernel/cap_last_cap' so we 150 * programmatically find the value by calling prctl(PR_CAPBSET_READ). 151 */ 152 unsigned int get_last_valid_cap(void) 153 { 154 unsigned int last_valid_cap = 0; 155 if (is_android()) { 156 for (; prctl(PR_CAPBSET_READ, last_valid_cap, 0, 0, 0) >= 0; 157 ++last_valid_cap) 158 ; 159 160 /* |last_valid_cap| will be the first failing value. */ 161 if (last_valid_cap > 0) { 162 last_valid_cap--; 163 } 164 } else { 165 const char cap_file[] = "/proc/sys/kernel/cap_last_cap"; 166 FILE *fp = fopen(cap_file, "re"); 167 if (fscanf(fp, "%u", &last_valid_cap) != 1) 168 pdie("fscanf(%s)", cap_file); 169 fclose(fp); 170 } 171 return last_valid_cap; 172 } 173 174 int cap_ambient_supported(void) 175 { 176 return prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) >= 177 0; 178 } 179 180 int config_net_loopback(void) 181 { 182 const char ifname[] = "lo"; 183 int sock; 184 struct ifreq ifr; 185 186 /* Make sure people don't try to add really long names. */ 187 _Static_assert(sizeof(ifname) <= IFNAMSIZ, "interface name too long"); 188 189 sock = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0); 190 if (sock < 0) { 191 pwarn("socket(AF_LOCAL) failed"); 192 return -1; 193 } 194 195 /* 196 * Do the equiv of `ip link set up lo`. The kernel will assign 197 * IPv4 (127.0.0.1) & IPv6 (::1) addresses automatically! 198 */ 199 strcpy(ifr.ifr_name, ifname); 200 if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) { 201 pwarn("ioctl(SIOCGIFFLAGS) failed"); 202 return -1; 203 } 204 205 /* The kernel preserves ifr.ifr_name for use. */ 206 ifr.ifr_flags |= IFF_UP | IFF_RUNNING; 207 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) { 208 pwarn("ioctl(SIOCSIFFLAGS) failed"); 209 return -1; 210 } 211 212 close(sock); 213 return 0; 214 } 215 216 int setup_pipe_end(int fds[2], size_t index) 217 { 218 if (index > 1) 219 return -1; 220 221 close(fds[1 - index]); 222 return fds[index]; 223 } 224 225 int setup_and_dupe_pipe_end(int fds[2], size_t index, int fd) 226 { 227 if (index > 1) 228 return -1; 229 230 close(fds[1 - index]); 231 /* dup2(2) the corresponding end of the pipe into |fd|. */ 232 return dup2(fds[index], fd); 233 } 234 235 int write_pid_to_path(pid_t pid, const char *path) 236 { 237 FILE *fp = fopen(path, "we"); 238 239 if (!fp) { 240 pwarn("failed to open '%s'", path); 241 return -errno; 242 } 243 if (fprintf(fp, "%d\n", (int)pid) < 0) { 244 /* fprintf(3) does not set errno on failure. */ 245 warn("fprintf(%s) failed", path); 246 return -1; 247 } 248 if (fclose(fp)) { 249 pwarn("fclose(%s) failed", path); 250 return -errno; 251 } 252 253 return 0; 254 } 255 256 /* 257 * Create the |path| directory and its parents (if need be) with |mode|. 258 * If not |isdir|, then |path| is actually a file, so the last component 259 * will not be created. 260 */ 261 int mkdir_p(const char *path, mode_t mode, bool isdir) 262 { 263 int rc; 264 char *dir = strdup(path); 265 if (!dir) { 266 rc = errno; 267 pwarn("strdup(%s) failed", path); 268 return -rc; 269 } 270 271 /* Starting from the root, work our way out to the end. */ 272 char *p = strchr(dir + 1, '/'); 273 while (p) { 274 *p = '\0'; 275 if (mkdir(dir, mode) && errno != EEXIST) { 276 rc = errno; 277 pwarn("mkdir(%s, 0%o) failed", dir, mode); 278 free(dir); 279 return -rc; 280 } 281 *p = '/'; 282 p = strchr(p + 1, '/'); 283 } 284 285 /* 286 * Create the last directory. We still check EEXIST here in case 287 * of trailing slashes. 288 */ 289 free(dir); 290 if (isdir && mkdir(path, mode) && errno != EEXIST) { 291 rc = errno; 292 pwarn("mkdir(%s, 0%o) failed", path, mode); 293 return -rc; 294 } 295 return 0; 296 } 297 298 /* 299 * setup_mount_destination: Ensures the mount target exists. 300 * Creates it if needed and possible. 301 */ 302 int setup_mount_destination(const char *source, const char *dest, uid_t uid, 303 uid_t gid, bool bind, unsigned long *mnt_flags) 304 { 305 int rc; 306 struct stat st_buf; 307 bool domkdir; 308 309 rc = stat(dest, &st_buf); 310 if (rc == 0) /* destination exists */ 311 return 0; 312 313 /* 314 * Try to create the destination. 315 * Either make a directory or touch a file depending on the source type. 316 * 317 * If the source isn't an absolute path, assume it is a filesystem type 318 * such as "tmpfs" and create a directory to mount it on. The dest will 319 * be something like "none" or "proc" which we shouldn't be checking. 320 */ 321 if (source[0] == '/') { 322 /* The source is an absolute path -- it better exist! */ 323 rc = stat(source, &st_buf); 324 if (rc) { 325 rc = errno; 326 pwarn("stat(%s) failed", source); 327 return -rc; 328 } 329 330 /* 331 * If bind mounting, we only create a directory if the source 332 * is a directory, else we always bind mount it as a file to 333 * support device nodes, sockets, etc... 334 * 335 * For all other mounts, we assume a block/char source is 336 * going to want a directory to mount to. If the source is 337 * something else (e.g. a fifo or socket), this probably will 338 * not do the right thing, but we'll fail later on when we try 339 * to mount(), so shouldn't be a big deal. 340 */ 341 domkdir = S_ISDIR(st_buf.st_mode) || 342 (!bind && (S_ISBLK(st_buf.st_mode) || 343 S_ISCHR(st_buf.st_mode))); 344 345 /* If bind mounting, also grab the mount flags of the source. */ 346 if (bind && mnt_flags) { 347 struct statvfs stvfs_buf; 348 rc = statvfs(source, &stvfs_buf); 349 if (rc) { 350 rc = errno; 351 pwarn( 352 "failed to look up mount flags: source=%s", 353 source); 354 return -rc; 355 } 356 *mnt_flags = stvfs_buf.f_flag; 357 } 358 } else { 359 /* The source is a relative path -- assume it's a pseudo fs. */ 360 361 /* Disallow relative bind mounts. */ 362 if (bind) { 363 warn("relative bind-mounts are not allowed: source=%s", 364 source); 365 return -EINVAL; 366 } 367 368 domkdir = true; 369 } 370 371 /* 372 * Now that we know what we want to do, do it! 373 * We always create the intermediate dirs and the final path with 0755 374 * perms and root/root ownership. This shouldn't be a problem because 375 * the actual mount will set those perms/ownership on the mount point 376 * which is all people should need to access it. 377 */ 378 rc = mkdir_p(dest, 0755, domkdir); 379 if (rc) 380 return rc; 381 if (!domkdir) { 382 int fd = open(dest, O_RDWR | O_CREAT | O_CLOEXEC, 0700); 383 if (fd < 0) { 384 rc = errno; 385 pwarn("open(%s) failed", dest); 386 return -rc; 387 } 388 close(fd); 389 } 390 if (chown(dest, uid, gid)) { 391 rc = errno; 392 pwarn("chown(%s, %u, %u) failed", dest, uid, gid); 393 return -rc; 394 } 395 return 0; 396 } 397 398 /* 399 * lookup_user: Gets the uid/gid for the given username. 400 */ 401 int lookup_user(const char *user, uid_t *uid, gid_t *gid) 402 { 403 char *buf = NULL; 404 struct passwd pw; 405 struct passwd *ppw = NULL; 406 ssize_t sz = sysconf(_SC_GETPW_R_SIZE_MAX); 407 if (sz == -1) 408 sz = 65536; /* your guess is as good as mine... */ 409 410 /* 411 * sysconf(_SC_GETPW_R_SIZE_MAX), under glibc, is documented to return 412 * the maximum needed size of the buffer, so we don't have to search. 413 */ 414 buf = malloc(sz); 415 if (!buf) 416 return -ENOMEM; 417 getpwnam_r(user, &pw, buf, sz, &ppw); 418 /* 419 * We're safe to free the buffer here. The strings inside |pw| point 420 * inside |buf|, but we don't use any of them; this leaves the pointers 421 * dangling but it's safe. |ppw| points at |pw| if getpwnam_r(3) 422 * succeeded. 423 */ 424 free(buf); 425 /* getpwnam_r(3) does *not* set errno when |ppw| is NULL. */ 426 if (!ppw) 427 return -1; 428 429 *uid = ppw->pw_uid; 430 *gid = ppw->pw_gid; 431 return 0; 432 } 433 434 /* 435 * lookup_group: Gets the gid for the given group name. 436 */ 437 int lookup_group(const char *group, gid_t *gid) 438 { 439 char *buf = NULL; 440 struct group gr; 441 struct group *pgr = NULL; 442 ssize_t sz = sysconf(_SC_GETGR_R_SIZE_MAX); 443 if (sz == -1) 444 sz = 65536; /* and mine is as good as yours, really */ 445 446 /* 447 * sysconf(_SC_GETGR_R_SIZE_MAX), under glibc, is documented to return 448 * the maximum needed size of the buffer, so we don't have to search. 449 */ 450 buf = malloc(sz); 451 if (!buf) 452 return -ENOMEM; 453 getgrnam_r(group, &gr, buf, sz, &pgr); 454 /* 455 * We're safe to free the buffer here. The strings inside gr point 456 * inside buf, but we don't use any of them; this leaves the pointers 457 * dangling but it's safe. pgr points at gr if getgrnam_r succeeded. 458 */ 459 free(buf); 460 /* getgrnam_r(3) does *not* set errno when |pgr| is NULL. */ 461 if (!pgr) 462 return -1; 463 464 *gid = pgr->gr_gid; 465 return 0; 466 } 467