1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <linux/unistd.h> 8 #include <netinet/in.h> 9 #include <netinet/tcp.h> 10 #include <netinet/udp.h> 11 #include <pthread.h> 12 #include <signal.h> 13 #include <stdarg.h> 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <string.h> 17 #include <sys/ioctl.h> 18 #include <sys/ipc.h> 19 #include <sys/mman.h> 20 #include <sys/prctl.h> 21 #include <sys/resource.h> 22 #include <sys/shm.h> 23 #include <sys/socket.h> 24 #include <sys/time.h> 25 #include <sys/types.h> 26 #include <time.h> 27 #include <unistd.h> 28 29 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" 30 31 using playground2::arch_seccomp_data; 32 using playground2::ErrorCode; 33 using playground2::Sandbox; 34 35 #define ERR EPERM 36 37 // We don't expect our sandbox to do anything useful yet. So, we will fail 38 // almost immediately. For now, force the code to continue running. The 39 // following line should be removed as soon as the sandbox is starting to 40 // actually enforce restrictions in a meaningful way: 41 #define _exit(x) do { } while (0) 42 43 namespace { 44 45 bool SendFds(int transport, const void *buf, size_t len, ...) { 46 int count = 0; 47 va_list ap; 48 va_start(ap, len); 49 while (va_arg(ap, int) >= 0) { 50 ++count; 51 } 52 va_end(ap); 53 if (!count) { 54 return false; 55 } 56 char cmsg_buf[CMSG_SPACE(count*sizeof(int))]; 57 memset(cmsg_buf, 0, sizeof(cmsg_buf)); 58 struct iovec iov[2] = { { 0 } }; 59 struct msghdr msg = { 0 }; 60 int dummy = 0; 61 iov[0].iov_base = &dummy; 62 iov[0].iov_len = sizeof(dummy); 63 if (buf && len > 0) { 64 iov[1].iov_base = const_cast<void *>(buf); 65 iov[1].iov_len = len; 66 } 67 msg.msg_iov = iov; 68 msg.msg_iovlen = (buf && len > 0) ? 2 : 1; 69 msg.msg_control = cmsg_buf; 70 msg.msg_controllen = CMSG_LEN(count*sizeof(int)); 71 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); 72 cmsg->cmsg_level = SOL_SOCKET; 73 cmsg->cmsg_type = SCM_RIGHTS; 74 cmsg->cmsg_len = CMSG_LEN(count*sizeof(int)); 75 va_start(ap, len); 76 for (int i = 0, fd; (fd = va_arg(ap, int)) >= 0; ++i) { 77 (reinterpret_cast<int *>(CMSG_DATA(cmsg)))[i] = fd; 78 } 79 return sendmsg(transport, &msg, 0) == 80 static_cast<ssize_t>(sizeof(dummy) + ((buf && len > 0) ? len : 0)); 81 } 82 83 bool GetFds(int transport, void *buf, size_t *len, ...) { 84 int count = 0; 85 va_list ap; 86 va_start(ap, len); 87 for (int *fd; (fd = va_arg(ap, int *)) != NULL; ++count) { 88 *fd = -1; 89 } 90 va_end(ap); 91 if (!count) { 92 return false; 93 } 94 char cmsg_buf[CMSG_SPACE(count*sizeof(int))]; 95 memset(cmsg_buf, 0, sizeof(cmsg_buf)); 96 struct iovec iov[2] = { { 0 } }; 97 struct msghdr msg = { 0 }; 98 int err; 99 iov[0].iov_base = &err; 100 iov[0].iov_len = sizeof(int); 101 if (buf && len && *len > 0) { 102 iov[1].iov_base = buf; 103 iov[1].iov_len = *len; 104 } 105 msg.msg_iov = iov; 106 msg.msg_iovlen = (buf && len && *len > 0) ? 2 : 1; 107 msg.msg_control = cmsg_buf; 108 msg.msg_controllen = CMSG_LEN(count*sizeof(int)); 109 ssize_t bytes = recvmsg(transport, &msg, 0); 110 if (len) { 111 *len = bytes > static_cast<int>(sizeof(int)) ? bytes - sizeof(int) : 0; 112 } 113 if (bytes != static_cast<ssize_t>(sizeof(int) + iov[1].iov_len)) { 114 if (bytes >= 0) { 115 errno = 0; 116 } 117 return false; 118 } 119 if (err) { 120 // "err" is the first four bytes of the payload. If these are non-zero, 121 // the sender on the other side of the socketpair sent us an errno value. 122 // We don't expect to get any file handles in this case. 123 errno = err; 124 return false; 125 } 126 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); 127 if ((msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) || 128 !cmsg || 129 cmsg->cmsg_level != SOL_SOCKET || 130 cmsg->cmsg_type != SCM_RIGHTS || 131 cmsg->cmsg_len != CMSG_LEN(count*sizeof(int))) { 132 errno = EBADF; 133 return false; 134 } 135 va_start(ap, len); 136 for (int *fd, i = 0; (fd = va_arg(ap, int *)) != NULL; ++i) { 137 *fd = (reinterpret_cast<int *>(CMSG_DATA(cmsg)))[i]; 138 } 139 va_end(ap); 140 return true; 141 } 142 143 144 // POSIX doesn't define any async-signal safe function for converting 145 // an integer to ASCII. We'll have to define our own version. 146 // itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the 147 // conversion was successful or NULL otherwise. It never writes more than "sz" 148 // bytes. Output will be truncated as needed, and a NUL character is always 149 // appended. 150 char *itoa_r(int i, char *buf, size_t sz) { 151 // Make sure we can write at least one NUL byte. 152 size_t n = 1; 153 if (n > sz) { 154 return NULL; 155 } 156 157 // Handle negative numbers. 158 char *start = buf; 159 int minint = 0; 160 if (i < 0) { 161 // Make sure we can write the '-' character. 162 if (++n > sz) { 163 *start = '\000'; 164 return NULL; 165 } 166 *start++ = '-'; 167 168 // Turn our number positive. 169 if (i == -i) { 170 // The lowest-most negative integer needs special treatment. 171 minint = 1; 172 i = -(i + 1); 173 } else { 174 // "Normal" negative numbers are easy. 175 i = -i; 176 } 177 } 178 179 // Loop until we have converted the entire number. Output at least one 180 // character (i.e. '0'). 181 char *ptr = start; 182 do { 183 // Make sure there is still enough space left in our output buffer. 184 if (++n > sz) { 185 buf = NULL; 186 goto truncate; 187 } 188 189 // Output the next digit and (if necessary) compensate for the lowest-most 190 // negative integer needing special treatment. This works because, no 191 // matter the bit width of the integer, the lowest-most integer always ends 192 // in 2, 4, 6, or 8. 193 *ptr++ = i%10 + '0' + minint; 194 minint = 0; 195 i /= 10; 196 } while (i); 197 truncate: // Terminate the output with a NUL character. 198 *ptr = '\000'; 199 200 // Conversion to ASCII actually resulted in the digits being in reverse 201 // order. We can't easily generate them in forward order, as we can't tell 202 // the number of characters needed until we are done converting. 203 // So, now, we reverse the string (except for the possible "-" sign). 204 while (--ptr > start) { 205 char ch = *ptr; 206 *ptr = *start; 207 *start++ = ch; 208 } 209 return buf; 210 } 211 212 // This handler gets called, whenever we encounter a system call that we 213 // don't recognize explicitly. For the purposes of this program, we just 214 // log the system call and then deny it. More elaborate sandbox policies 215 // might try to evaluate the system call in user-space, instead. 216 // The only notable complication is that this function must be async-signal 217 // safe. This restricts the libary functions that we can call. 218 intptr_t DefaultHandler(const struct arch_seccomp_data& data, void *) { 219 static const char msg0[] = "Disallowed system call #"; 220 static const char msg1[] = "\n"; 221 char buf[sizeof(msg0) - 1 + 25 + sizeof(msg1)]; 222 223 *buf = '\000'; 224 strncat(buf, msg0, sizeof(buf)); 225 226 char *ptr = strrchr(buf, '\000'); 227 itoa_r(data.nr, ptr, sizeof(buf) - (ptr - buf)); 228 229 ptr = strrchr(ptr, '\000'); 230 strncat(ptr, msg1, sizeof(buf) - (ptr - buf)); 231 232 ptr = strrchr(ptr, '\000'); 233 if (HANDLE_EINTR(write(2, buf, ptr - buf))) { } 234 235 return -ERR; 236 } 237 238 ErrorCode Evaluator(Sandbox *sandbox, int sysno, void *) { 239 switch (sysno) { 240 #if defined(__NR_accept) 241 case __NR_accept: case __NR_accept4: 242 #endif 243 case __NR_alarm: 244 case __NR_brk: 245 case __NR_clock_gettime: 246 case __NR_close: 247 case __NR_dup: case __NR_dup2: 248 case __NR_epoll_create: case __NR_epoll_ctl: case __NR_epoll_wait: 249 case __NR_exit: case __NR_exit_group: 250 case __NR_fcntl: 251 #if defined(__NR_fcntl64) 252 case __NR_fcntl64: 253 #endif 254 case __NR_fdatasync: 255 case __NR_fstat: 256 #if defined(__NR_fstat64) 257 case __NR_fstat64: 258 #endif 259 case __NR_ftruncate: 260 case __NR_futex: 261 case __NR_getdents: case __NR_getdents64: 262 case __NR_getegid: 263 #if defined(__NR_getegid32) 264 case __NR_getegid32: 265 #endif 266 case __NR_geteuid: 267 #if defined(__NR_geteuid32) 268 case __NR_geteuid32: 269 #endif 270 case __NR_getgid: 271 #if defined(__NR_getgid32) 272 case __NR_getgid32: 273 #endif 274 case __NR_getitimer: case __NR_setitimer: 275 #if defined(__NR_getpeername) 276 case __NR_getpeername: 277 #endif 278 case __NR_getpid: case __NR_gettid: 279 #if defined(__NR_getsockname) 280 case __NR_getsockname: 281 #endif 282 case __NR_gettimeofday: 283 case __NR_getuid: 284 #if defined(__NR_getuid32) 285 case __NR_getuid32: 286 #endif 287 #if defined(__NR__llseek) 288 case __NR__llseek: 289 #endif 290 case __NR_lseek: 291 case __NR_nanosleep: 292 case __NR_pipe: case __NR_pipe2: 293 case __NR_poll: 294 case __NR_pread64: case __NR_preadv: 295 case __NR_pwrite64: case __NR_pwritev: 296 case __NR_read: case __NR_readv: 297 case __NR_restart_syscall: 298 case __NR_set_robust_list: 299 case __NR_rt_sigaction: 300 #if defined(__NR_sigaction) 301 case __NR_sigaction: 302 #endif 303 #if defined(__NR_signal) 304 case __NR_signal: 305 #endif 306 case __NR_rt_sigprocmask: 307 #if defined(__NR_sigprocmask) 308 case __NR_sigprocmask: 309 #endif 310 #if defined(__NR_shutdown) 311 case __NR_shutdown: 312 #endif 313 case __NR_rt_sigreturn: 314 #if defined(__NR_sigreturn) 315 case __NR_sigreturn: 316 #endif 317 #if defined(__NR_socketpair) 318 case __NR_socketpair: 319 #endif 320 case __NR_time: 321 case __NR_uname: 322 case __NR_write: case __NR_writev: 323 return ErrorCode(ErrorCode::ERR_ALLOWED); 324 325 case __NR_prctl: 326 // Allow PR_SET_DUMPABLE and PR_GET_DUMPABLE. Do not allow anything else. 327 return sandbox->Cond(1, ErrorCode::TP_32BIT, ErrorCode::OP_EQUAL, 328 PR_SET_DUMPABLE, 329 ErrorCode(ErrorCode::ERR_ALLOWED), 330 sandbox->Cond(1, ErrorCode::TP_32BIT, ErrorCode::OP_EQUAL, 331 PR_GET_DUMPABLE, 332 ErrorCode(ErrorCode::ERR_ALLOWED), 333 sandbox->Trap(DefaultHandler, NULL))); 334 335 // The following system calls are temporarily permitted. This must be 336 // tightened later. But we currently don't implement enough of the sandboxing 337 // API to do so. 338 // As is, this sandbox isn't exactly safe :-/ 339 #if defined(__NR_sendmsg) 340 case __NR_sendmsg: case __NR_sendto: 341 case __NR_recvmsg: case __NR_recvfrom: 342 case __NR_getsockopt: case __NR_setsockopt: 343 #elif defined(__NR_socketcall) 344 case __NR_socketcall: 345 #endif 346 #if defined(__NR_shmat) 347 case __NR_shmat: case __NR_shmctl: case __NR_shmdt: case __NR_shmget: 348 #elif defined(__NR_ipc) 349 case __NR_ipc: 350 #endif 351 #if defined(__NR_mmap2) 352 case __NR_mmap2: 353 #else 354 case __NR_mmap: 355 #endif 356 #if defined(__NR_ugetrlimit) 357 case __NR_ugetrlimit: 358 #endif 359 case __NR_getrlimit: 360 case __NR_ioctl: 361 case __NR_clone: 362 case __NR_munmap: case __NR_mprotect: case __NR_madvise: 363 case __NR_remap_file_pages: 364 return ErrorCode(ErrorCode::ERR_ALLOWED); 365 366 // Everything that isn't explicitly allowed is denied. 367 default: 368 return sandbox->Trap(DefaultHandler, NULL); 369 } 370 } 371 372 void *ThreadFnc(void *arg) { 373 return arg; 374 } 375 376 void *SendmsgStressThreadFnc(void *arg) { 377 if (arg) { } 378 static const int repetitions = 100; 379 static const int kNumFds = 3; 380 for (int rep = 0; rep < repetitions; ++rep) { 381 int fds[2 + kNumFds]; 382 if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) { 383 perror("socketpair()"); 384 _exit(1); 385 } 386 size_t len = 4; 387 char buf[4]; 388 if (!SendFds(fds[0], "test", 4, fds[1], fds[1], fds[1], -1) || 389 !GetFds(fds[1], buf, &len, fds+2, fds+3, fds+4, NULL) || 390 len != 4 || 391 memcmp(buf, "test", len) || 392 write(fds[2], "demo", 4) != 4 || 393 read(fds[0], buf, 4) != 4 || 394 memcmp(buf, "demo", 4)) { 395 perror("sending/receiving of fds"); 396 _exit(1); 397 } 398 for (int i = 0; i < 2+kNumFds; ++i) { 399 if (close(fds[i])) { 400 perror("close"); 401 _exit(1); 402 } 403 } 404 } 405 return NULL; 406 } 407 408 } // namespace 409 410 int main(int argc, char *argv[]) { 411 if (argc) { } 412 if (argv) { } 413 int proc_fd = open("/proc", O_RDONLY|O_DIRECTORY); 414 if (Sandbox::SupportsSeccompSandbox(proc_fd) != 415 Sandbox::STATUS_AVAILABLE) { 416 perror("sandbox"); 417 _exit(1); 418 } 419 Sandbox sandbox; 420 sandbox.set_proc_fd(proc_fd); 421 sandbox.SetSandboxPolicy(Evaluator, NULL); 422 sandbox.StartSandbox(); 423 424 // Check that we can create threads 425 pthread_t thr; 426 if (!pthread_create(&thr, NULL, ThreadFnc, 427 reinterpret_cast<void *>(0x1234))) { 428 void *ret; 429 pthread_join(thr, &ret); 430 if (ret != reinterpret_cast<void *>(0x1234)) { 431 perror("clone() failed"); 432 _exit(1); 433 } 434 } else { 435 perror("clone() failed"); 436 _exit(1); 437 } 438 439 // Check that we handle restart_syscall() without dieing. This is a little 440 // tricky to trigger. And I can't think of a good way to verify whether it 441 // actually executed. 442 signal(SIGALRM, SIG_IGN); 443 const struct itimerval tv = { { 0, 0 }, { 0, 5*1000 } }; 444 const struct timespec tmo = { 0, 100*1000*1000 }; 445 setitimer(ITIMER_REAL, &tv, NULL); 446 nanosleep(&tmo, NULL); 447 448 // Check that we can query the size of the stack, but that all other 449 // calls to getrlimit() fail. 450 if (((errno = 0), !getrlimit(RLIMIT_STACK, NULL)) || errno != EFAULT || 451 ((errno = 0), !getrlimit(RLIMIT_CORE, NULL)) || errno != ERR) { 452 perror("getrlimit()"); 453 _exit(1); 454 } 455 456 // Check that we can query TCGETS and TIOCGWINSZ, but no other ioctls(). 457 if (((errno = 0), !ioctl(2, TCGETS, NULL)) || errno != EFAULT || 458 ((errno = 0), !ioctl(2, TIOCGWINSZ, NULL)) || errno != EFAULT || 459 ((errno = 0), !ioctl(2, TCSETS, NULL)) || errno != ERR) { 460 perror("ioctl()"); 461 _exit(1); 462 } 463 464 // Check that prctl() can manipulate the dumpable flag, but nothing else. 465 if (((errno = 0), !prctl(PR_GET_DUMPABLE)) || errno || 466 ((errno = 0), prctl(PR_SET_DUMPABLE, 1)) || errno || 467 ((errno = 0), !prctl(PR_SET_SECCOMP, 0)) || errno != ERR) { 468 perror("prctl()"); 469 _exit(1); 470 } 471 472 // Check that we can send and receive file handles. 473 int fds[3]; 474 if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) { 475 perror("socketpair()"); 476 _exit(1); 477 } 478 size_t len = 4; 479 char buf[4]; 480 if (!SendFds(fds[0], "test", 4, fds[1], -1) || 481 !GetFds(fds[1], buf, &len, fds+2, NULL) || 482 len != 4 || 483 memcmp(buf, "test", len) || 484 write(fds[2], "demo", 4) != 4 || 485 read(fds[0], buf, 4) != 4 || 486 memcmp(buf, "demo", 4) || 487 close(fds[0]) || 488 close(fds[1]) || 489 close(fds[2])) { 490 perror("sending/receiving of fds"); 491 _exit(1); 492 } 493 494 // Check whether SysV IPC works. 495 int shmid; 496 void *addr; 497 if ((shmid = shmget(IPC_PRIVATE, 4096, IPC_CREAT|0600)) < 0 || 498 (addr = shmat(shmid, NULL, 0)) == reinterpret_cast<void *>(-1) || 499 shmdt(addr) || 500 shmctl(shmid, IPC_RMID, NULL)) { 501 perror("sysv IPC"); 502 _exit(1); 503 } 504 505 // Print a message so that the user can see the sandbox is activated. 506 time_t tm = time(NULL); 507 printf("Sandbox has been started at %s", ctime(&tm)); 508 509 // Stress-test the sendmsg() code 510 static const int kSendmsgStressNumThreads = 10; 511 pthread_t sendmsgStressThreads[kSendmsgStressNumThreads]; 512 for (int i = 0; i < kSendmsgStressNumThreads; ++i) { 513 if (pthread_create(sendmsgStressThreads + i, NULL, 514 SendmsgStressThreadFnc, NULL)) { 515 perror("pthread_create"); 516 _exit(1); 517 } 518 } 519 for (int i = 0; i < kSendmsgStressNumThreads; ++i) { 520 pthread_join(sendmsgStressThreads[i], NULL); 521 } 522 523 return 0; 524 } 525