Home | History | Annotate | Download | only in seccomp-bpf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <errno.h>
      6 #include <fcntl.h>
      7 #include <linux/unistd.h>
      8 #include <netinet/in.h>
      9 #include <netinet/tcp.h>
     10 #include <netinet/udp.h>
     11 #include <pthread.h>
     12 #include <signal.h>
     13 #include <stdarg.h>
     14 #include <stdio.h>
     15 #include <stdlib.h>
     16 #include <string.h>
     17 #include <sys/ioctl.h>
     18 #include <sys/ipc.h>
     19 #include <sys/mman.h>
     20 #include <sys/prctl.h>
     21 #include <sys/resource.h>
     22 #include <sys/shm.h>
     23 #include <sys/socket.h>
     24 #include <sys/time.h>
     25 #include <sys/types.h>
     26 #include <time.h>
     27 #include <unistd.h>
     28 
     29 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
     30 
     31 using playground2::arch_seccomp_data;
     32 using playground2::ErrorCode;
     33 using playground2::Sandbox;
     34 
     35 #define ERR EPERM
     36 
     37 // We don't expect our sandbox to do anything useful yet. So, we will fail
     38 // almost immediately. For now, force the code to continue running. The
     39 // following line should be removed as soon as the sandbox is starting to
     40 // actually enforce restrictions in a meaningful way:
     41 #define _exit(x) do { } while (0)
     42 
     43 namespace {
     44 
     45 bool SendFds(int transport, const void *buf, size_t len, ...) {
     46   int count = 0;
     47   va_list ap;
     48   va_start(ap, len);
     49   while (va_arg(ap, int) >= 0) {
     50     ++count;
     51   }
     52   va_end(ap);
     53   if (!count) {
     54     return false;
     55   }
     56   char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
     57   memset(cmsg_buf, 0, sizeof(cmsg_buf));
     58   struct iovec  iov[2] = { { 0 } };
     59   struct msghdr msg    = { 0 };
     60   int dummy            = 0;
     61   iov[0].iov_base      = &dummy;
     62   iov[0].iov_len       = sizeof(dummy);
     63   if (buf && len > 0) {
     64     iov[1].iov_base    = const_cast<void *>(buf);
     65     iov[1].iov_len     = len;
     66   }
     67   msg.msg_iov          = iov;
     68   msg.msg_iovlen       = (buf && len > 0) ? 2 : 1;
     69   msg.msg_control      = cmsg_buf;
     70   msg.msg_controllen   = CMSG_LEN(count*sizeof(int));
     71   struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
     72   cmsg->cmsg_level     = SOL_SOCKET;
     73   cmsg->cmsg_type      = SCM_RIGHTS;
     74   cmsg->cmsg_len       = CMSG_LEN(count*sizeof(int));
     75   va_start(ap, len);
     76   for (int i = 0, fd; (fd = va_arg(ap, int)) >= 0; ++i) {
     77     (reinterpret_cast<int *>(CMSG_DATA(cmsg)))[i] = fd;
     78   }
     79   return sendmsg(transport, &msg, 0) ==
     80       static_cast<ssize_t>(sizeof(dummy) + ((buf && len > 0) ? len : 0));
     81 }
     82 
     83 bool GetFds(int transport, void *buf, size_t *len, ...) {
     84   int count = 0;
     85   va_list ap;
     86   va_start(ap, len);
     87   for (int *fd; (fd = va_arg(ap, int *)) != NULL; ++count) {
     88     *fd = -1;
     89   }
     90   va_end(ap);
     91   if (!count) {
     92     return false;
     93   }
     94   char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
     95   memset(cmsg_buf, 0, sizeof(cmsg_buf));
     96   struct iovec iov[2] = { { 0 } };
     97   struct msghdr msg   = { 0 };
     98   int err;
     99   iov[0].iov_base     = &err;
    100   iov[0].iov_len      = sizeof(int);
    101   if (buf && len && *len > 0) {
    102     iov[1].iov_base   = buf;
    103     iov[1].iov_len    = *len;
    104   }
    105   msg.msg_iov         = iov;
    106   msg.msg_iovlen      = (buf && len && *len > 0) ? 2 : 1;
    107   msg.msg_control     = cmsg_buf;
    108   msg.msg_controllen  = CMSG_LEN(count*sizeof(int));
    109   ssize_t bytes = recvmsg(transport, &msg, 0);
    110   if (len) {
    111     *len = bytes > static_cast<int>(sizeof(int)) ? bytes - sizeof(int) : 0;
    112   }
    113   if (bytes != static_cast<ssize_t>(sizeof(int) + iov[1].iov_len)) {
    114     if (bytes >= 0) {
    115       errno = 0;
    116     }
    117     return false;
    118   }
    119   if (err) {
    120     // "err" is the first four bytes of the payload. If these are non-zero,
    121     // the sender on the other side of the socketpair sent us an errno value.
    122     // We don't expect to get any file handles in this case.
    123     errno = err;
    124     return false;
    125   }
    126   struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
    127   if ((msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) ||
    128       !cmsg                                    ||
    129       cmsg->cmsg_level != SOL_SOCKET           ||
    130       cmsg->cmsg_type  != SCM_RIGHTS           ||
    131       cmsg->cmsg_len   != CMSG_LEN(count*sizeof(int))) {
    132     errno = EBADF;
    133     return false;
    134   }
    135   va_start(ap, len);
    136   for (int *fd, i = 0; (fd = va_arg(ap, int *)) != NULL; ++i) {
    137     *fd = (reinterpret_cast<int *>(CMSG_DATA(cmsg)))[i];
    138   }
    139   va_end(ap);
    140   return true;
    141 }
    142 
    143 
    144 // POSIX doesn't define any async-signal safe function for converting
    145 // an integer to ASCII. We'll have to define our own version.
    146 // itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the
    147 // conversion was successful or NULL otherwise. It never writes more than "sz"
    148 // bytes. Output will be truncated as needed, and a NUL character is always
    149 // appended.
    150 char *itoa_r(int i, char *buf, size_t sz) {
    151   // Make sure we can write at least one NUL byte.
    152   size_t n = 1;
    153   if (n > sz) {
    154     return NULL;
    155   }
    156 
    157   // Handle negative numbers.
    158   char *start = buf;
    159   int minint = 0;
    160   if (i < 0) {
    161     // Make sure we can write the '-' character.
    162     if (++n > sz) {
    163       *start = '\000';
    164       return NULL;
    165     }
    166     *start++ = '-';
    167 
    168     // Turn our number positive.
    169     if (i == -i) {
    170       // The lowest-most negative integer needs special treatment.
    171       minint = 1;
    172       i = -(i + 1);
    173     } else {
    174       // "Normal" negative numbers are easy.
    175       i = -i;
    176     }
    177   }
    178 
    179   // Loop until we have converted the entire number. Output at least one
    180   // character (i.e. '0').
    181   char *ptr = start;
    182   do {
    183     // Make sure there is still enough space left in our output buffer.
    184     if (++n > sz) {
    185       buf = NULL;
    186       goto truncate;
    187     }
    188 
    189     // Output the next digit and (if necessary) compensate for the lowest-most
    190     // negative integer needing special treatment. This works because, no
    191     // matter the bit width of the integer, the lowest-most integer always ends
    192     // in 2, 4, 6, or 8.
    193     *ptr++ = i%10 + '0' + minint;
    194     minint = 0;
    195     i /= 10;
    196   } while (i);
    197  truncate:  // Terminate the output with a NUL character.
    198   *ptr = '\000';
    199 
    200   // Conversion to ASCII actually resulted in the digits being in reverse
    201   // order. We can't easily generate them in forward order, as we can't tell
    202   // the number of characters needed until we are done converting.
    203   // So, now, we reverse the string (except for the possible "-" sign).
    204   while (--ptr > start) {
    205     char ch = *ptr;
    206     *ptr = *start;
    207     *start++ = ch;
    208   }
    209   return buf;
    210 }
    211 
    212 // This handler gets called, whenever we encounter a system call that we
    213 // don't recognize explicitly. For the purposes of this program, we just
    214 // log the system call and then deny it. More elaborate sandbox policies
    215 // might try to evaluate the system call in user-space, instead.
    216 // The only notable complication is that this function must be async-signal
    217 // safe. This restricts the libary functions that we can call.
    218 intptr_t DefaultHandler(const struct arch_seccomp_data& data, void *) {
    219   static const char msg0[] = "Disallowed system call #";
    220   static const char msg1[] = "\n";
    221   char buf[sizeof(msg0) - 1 + 25 + sizeof(msg1)];
    222 
    223   *buf = '\000';
    224   strncat(buf, msg0, sizeof(buf));
    225 
    226   char *ptr = strrchr(buf, '\000');
    227   itoa_r(data.nr, ptr, sizeof(buf) - (ptr - buf));
    228 
    229   ptr = strrchr(ptr, '\000');
    230   strncat(ptr, msg1, sizeof(buf) - (ptr - buf));
    231 
    232   ptr = strrchr(ptr, '\000');
    233   if (HANDLE_EINTR(write(2, buf, ptr - buf))) { }
    234 
    235   return -ERR;
    236 }
    237 
    238 ErrorCode Evaluator(Sandbox *sandbox, int sysno, void *) {
    239   switch (sysno) {
    240 #if defined(__NR_accept)
    241   case __NR_accept: case __NR_accept4:
    242 #endif
    243   case __NR_alarm:
    244   case __NR_brk:
    245   case __NR_clock_gettime:
    246   case __NR_close:
    247   case __NR_dup: case __NR_dup2:
    248   case __NR_epoll_create: case __NR_epoll_ctl: case __NR_epoll_wait:
    249   case __NR_exit: case __NR_exit_group:
    250   case __NR_fcntl:
    251 #if defined(__NR_fcntl64)
    252   case __NR_fcntl64:
    253 #endif
    254   case __NR_fdatasync:
    255   case __NR_fstat:
    256 #if defined(__NR_fstat64)
    257   case __NR_fstat64:
    258 #endif
    259   case __NR_ftruncate:
    260   case __NR_futex:
    261   case __NR_getdents: case __NR_getdents64:
    262   case __NR_getegid:
    263 #if defined(__NR_getegid32)
    264   case __NR_getegid32:
    265 #endif
    266   case __NR_geteuid:
    267 #if defined(__NR_geteuid32)
    268   case __NR_geteuid32:
    269 #endif
    270   case __NR_getgid:
    271 #if defined(__NR_getgid32)
    272   case __NR_getgid32:
    273 #endif
    274   case __NR_getitimer: case __NR_setitimer:
    275 #if defined(__NR_getpeername)
    276   case __NR_getpeername:
    277 #endif
    278   case __NR_getpid: case __NR_gettid:
    279 #if defined(__NR_getsockname)
    280   case __NR_getsockname:
    281 #endif
    282   case __NR_gettimeofday:
    283   case __NR_getuid:
    284 #if defined(__NR_getuid32)
    285   case __NR_getuid32:
    286 #endif
    287 #if defined(__NR__llseek)
    288   case __NR__llseek:
    289 #endif
    290   case __NR_lseek:
    291   case __NR_nanosleep:
    292   case __NR_pipe: case __NR_pipe2:
    293   case __NR_poll:
    294   case __NR_pread64: case __NR_preadv:
    295   case __NR_pwrite64: case __NR_pwritev:
    296   case __NR_read: case __NR_readv:
    297   case __NR_restart_syscall:
    298   case __NR_set_robust_list:
    299   case __NR_rt_sigaction:
    300 #if defined(__NR_sigaction)
    301   case __NR_sigaction:
    302 #endif
    303 #if defined(__NR_signal)
    304   case __NR_signal:
    305 #endif
    306   case __NR_rt_sigprocmask:
    307 #if defined(__NR_sigprocmask)
    308   case __NR_sigprocmask:
    309 #endif
    310 #if defined(__NR_shutdown)
    311   case __NR_shutdown:
    312 #endif
    313   case __NR_rt_sigreturn:
    314 #if defined(__NR_sigreturn)
    315   case __NR_sigreturn:
    316 #endif
    317 #if defined(__NR_socketpair)
    318   case __NR_socketpair:
    319 #endif
    320   case __NR_time:
    321   case __NR_uname:
    322   case __NR_write: case __NR_writev:
    323     return ErrorCode(ErrorCode::ERR_ALLOWED);
    324 
    325   case __NR_prctl:
    326     // Allow PR_SET_DUMPABLE and PR_GET_DUMPABLE. Do not allow anything else.
    327     return sandbox->Cond(1, ErrorCode::TP_32BIT, ErrorCode::OP_EQUAL,
    328                          PR_SET_DUMPABLE,
    329                          ErrorCode(ErrorCode::ERR_ALLOWED),
    330            sandbox->Cond(1, ErrorCode::TP_32BIT, ErrorCode::OP_EQUAL,
    331                          PR_GET_DUMPABLE,
    332                          ErrorCode(ErrorCode::ERR_ALLOWED),
    333            sandbox->Trap(DefaultHandler, NULL)));
    334 
    335   // The following system calls are temporarily permitted. This must be
    336   // tightened later. But we currently don't implement enough of the sandboxing
    337   // API to do so.
    338   // As is, this sandbox isn't exactly safe :-/
    339 #if defined(__NR_sendmsg)
    340   case __NR_sendmsg: case __NR_sendto:
    341   case __NR_recvmsg: case __NR_recvfrom:
    342   case __NR_getsockopt: case __NR_setsockopt:
    343 #elif defined(__NR_socketcall)
    344   case __NR_socketcall:
    345 #endif
    346 #if defined(__NR_shmat)
    347   case __NR_shmat: case __NR_shmctl: case __NR_shmdt: case __NR_shmget:
    348 #elif defined(__NR_ipc)
    349   case __NR_ipc:
    350 #endif
    351 #if defined(__NR_mmap2)
    352   case __NR_mmap2:
    353 #else
    354   case __NR_mmap:
    355 #endif
    356 #if defined(__NR_ugetrlimit)
    357   case __NR_ugetrlimit:
    358 #endif
    359   case __NR_getrlimit:
    360   case __NR_ioctl:
    361   case __NR_clone:
    362   case __NR_munmap: case __NR_mprotect: case __NR_madvise:
    363   case __NR_remap_file_pages:
    364     return ErrorCode(ErrorCode::ERR_ALLOWED);
    365 
    366   // Everything that isn't explicitly allowed is denied.
    367   default:
    368     return sandbox->Trap(DefaultHandler, NULL);
    369   }
    370 }
    371 
    372 void *ThreadFnc(void *arg) {
    373   return arg;
    374 }
    375 
    376 void *SendmsgStressThreadFnc(void *arg) {
    377   if (arg) { }
    378   static const int repetitions = 100;
    379   static const int kNumFds = 3;
    380   for (int rep = 0; rep < repetitions; ++rep) {
    381     int fds[2 + kNumFds];
    382     if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) {
    383       perror("socketpair()");
    384       _exit(1);
    385     }
    386     size_t len = 4;
    387     char buf[4];
    388     if (!SendFds(fds[0], "test", 4, fds[1], fds[1], fds[1], -1) ||
    389         !GetFds(fds[1], buf, &len, fds+2, fds+3, fds+4, NULL) ||
    390         len != 4 ||
    391         memcmp(buf, "test", len) ||
    392         write(fds[2], "demo", 4) != 4 ||
    393         read(fds[0], buf, 4) != 4 ||
    394         memcmp(buf, "demo", 4)) {
    395       perror("sending/receiving of fds");
    396       _exit(1);
    397     }
    398     for (int i = 0; i < 2+kNumFds; ++i) {
    399       if (close(fds[i])) {
    400         perror("close");
    401         _exit(1);
    402       }
    403     }
    404   }
    405   return NULL;
    406 }
    407 
    408 }  // namespace
    409 
    410 int main(int argc, char *argv[]) {
    411   if (argc) { }
    412   if (argv) { }
    413   int proc_fd = open("/proc", O_RDONLY|O_DIRECTORY);
    414   if (Sandbox::SupportsSeccompSandbox(proc_fd) !=
    415       Sandbox::STATUS_AVAILABLE) {
    416     perror("sandbox");
    417     _exit(1);
    418   }
    419   Sandbox sandbox;
    420   sandbox.set_proc_fd(proc_fd);
    421   sandbox.SetSandboxPolicy(Evaluator, NULL);
    422   sandbox.StartSandbox();
    423 
    424   // Check that we can create threads
    425   pthread_t thr;
    426   if (!pthread_create(&thr, NULL, ThreadFnc,
    427                       reinterpret_cast<void *>(0x1234))) {
    428     void *ret;
    429     pthread_join(thr, &ret);
    430     if (ret != reinterpret_cast<void *>(0x1234)) {
    431       perror("clone() failed");
    432       _exit(1);
    433     }
    434   } else {
    435     perror("clone() failed");
    436     _exit(1);
    437   }
    438 
    439   // Check that we handle restart_syscall() without dieing. This is a little
    440   // tricky to trigger. And I can't think of a good way to verify whether it
    441   // actually executed.
    442   signal(SIGALRM, SIG_IGN);
    443   const struct itimerval tv = { { 0, 0 }, { 0, 5*1000 } };
    444   const struct timespec tmo = { 0, 100*1000*1000 };
    445   setitimer(ITIMER_REAL, &tv, NULL);
    446   nanosleep(&tmo, NULL);
    447 
    448   // Check that we can query the size of the stack, but that all other
    449   // calls to getrlimit() fail.
    450   if (((errno = 0), !getrlimit(RLIMIT_STACK, NULL)) || errno != EFAULT ||
    451       ((errno = 0), !getrlimit(RLIMIT_CORE,  NULL)) || errno != ERR) {
    452     perror("getrlimit()");
    453     _exit(1);
    454   }
    455 
    456   // Check that we can query TCGETS and TIOCGWINSZ, but no other ioctls().
    457   if (((errno = 0), !ioctl(2, TCGETS,     NULL)) || errno != EFAULT ||
    458       ((errno = 0), !ioctl(2, TIOCGWINSZ, NULL)) || errno != EFAULT ||
    459       ((errno = 0), !ioctl(2, TCSETS,     NULL)) || errno != ERR) {
    460     perror("ioctl()");
    461     _exit(1);
    462   }
    463 
    464   // Check that prctl() can manipulate the dumpable flag, but nothing else.
    465   if (((errno = 0), !prctl(PR_GET_DUMPABLE))    || errno ||
    466       ((errno = 0),  prctl(PR_SET_DUMPABLE, 1)) || errno ||
    467       ((errno = 0), !prctl(PR_SET_SECCOMP,  0)) || errno != ERR) {
    468     perror("prctl()");
    469     _exit(1);
    470   }
    471 
    472   // Check that we can send and receive file handles.
    473   int fds[3];
    474   if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) {
    475     perror("socketpair()");
    476     _exit(1);
    477   }
    478   size_t len = 4;
    479   char buf[4];
    480   if (!SendFds(fds[0], "test", 4, fds[1], -1) ||
    481       !GetFds(fds[1], buf, &len, fds+2, NULL) ||
    482       len != 4 ||
    483       memcmp(buf, "test", len) ||
    484       write(fds[2], "demo", 4) != 4 ||
    485       read(fds[0], buf, 4) != 4 ||
    486       memcmp(buf, "demo", 4) ||
    487       close(fds[0]) ||
    488       close(fds[1]) ||
    489       close(fds[2])) {
    490     perror("sending/receiving of fds");
    491     _exit(1);
    492   }
    493 
    494   // Check whether SysV IPC works.
    495   int shmid;
    496   void *addr;
    497   if ((shmid = shmget(IPC_PRIVATE, 4096, IPC_CREAT|0600)) < 0 ||
    498       (addr = shmat(shmid, NULL, 0)) == reinterpret_cast<void *>(-1) ||
    499       shmdt(addr) ||
    500       shmctl(shmid, IPC_RMID, NULL)) {
    501     perror("sysv IPC");
    502     _exit(1);
    503   }
    504 
    505   // Print a message so that the user can see the sandbox is activated.
    506   time_t tm = time(NULL);
    507   printf("Sandbox has been started at %s", ctime(&tm));
    508 
    509   // Stress-test the sendmsg() code
    510   static const int kSendmsgStressNumThreads = 10;
    511   pthread_t sendmsgStressThreads[kSendmsgStressNumThreads];
    512   for (int i = 0; i < kSendmsgStressNumThreads; ++i) {
    513     if (pthread_create(sendmsgStressThreads + i, NULL,
    514                        SendmsgStressThreadFnc, NULL)) {
    515       perror("pthread_create");
    516       _exit(1);
    517     }
    518   }
    519   for (int i = 0; i < kSendmsgStressNumThreads; ++i) {
    520     pthread_join(sendmsgStressThreads[i], NULL);
    521   }
    522 
    523   return 0;
    524 }
    525