Home | History | Annotate | Download | only in seccomp-bpf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
      6 
      7 #include <errno.h>
      8 #include <stdint.h>
      9 #include <sys/prctl.h>
     10 #include <sys/types.h>
     11 #include <unistd.h>
     12 
     13 #include "base/compiler_specific.h"
     14 #include "base/files/scoped_file.h"
     15 #include "base/logging.h"
     16 #include "base/macros.h"
     17 #include "base/memory/scoped_ptr.h"
     18 #include "base/posix/eintr_wrapper.h"
     19 #include "sandbox/linux/bpf_dsl/bpf_dsl.h"
     20 #include "sandbox/linux/bpf_dsl/codegen.h"
     21 #include "sandbox/linux/bpf_dsl/policy.h"
     22 #include "sandbox/linux/bpf_dsl/policy_compiler.h"
     23 #include "sandbox/linux/bpf_dsl/seccomp_macros.h"
     24 #include "sandbox/linux/bpf_dsl/syscall_set.h"
     25 #include "sandbox/linux/seccomp-bpf/die.h"
     26 #include "sandbox/linux/seccomp-bpf/syscall.h"
     27 #include "sandbox/linux/seccomp-bpf/trap.h"
     28 #include "sandbox/linux/services/proc_util.h"
     29 #include "sandbox/linux/services/syscall_wrappers.h"
     30 #include "sandbox/linux/services/thread_helpers.h"
     31 #include "sandbox/linux/system_headers/linux_filter.h"
     32 #include "sandbox/linux/system_headers/linux_seccomp.h"
     33 #include "sandbox/linux/system_headers/linux_syscalls.h"
     34 #include "third_party/valgrind/valgrind.h"
     35 
     36 namespace sandbox {
     37 
     38 namespace {
     39 
     40 bool IsRunningOnValgrind() { return RUNNING_ON_VALGRIND; }
     41 
     42 bool IsSingleThreaded(int proc_fd) {
     43   return ThreadHelpers::IsSingleThreaded(proc_fd);
     44 }
     45 
     46 // Check if the kernel supports seccomp-filter (a.k.a. seccomp mode 2) via
     47 // prctl().
     48 bool KernelSupportsSeccompBPF() {
     49   errno = 0;
     50   const int rv = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, nullptr);
     51 
     52   if (rv == -1 && EFAULT == errno) {
     53     return true;
     54   }
     55   return false;
     56 }
     57 
     58 // LG introduced a buggy syscall, sys_set_media_ext, with the same number as
     59 // seccomp. Return true if the current kernel has this buggy syscall.
     60 //
     61 // We want this to work with upcoming versions of seccomp, so we pass bogus
     62 // flags that are unlikely to ever be used by the kernel. A normal kernel would
     63 // return -EINVAL, but a buggy LG kernel would return 1.
     64 bool KernelHasLGBug() {
     65 #if defined(OS_ANDROID)
     66   // sys_set_media will see this as NULL, which should be a safe (non-crashing)
     67   // way to invoke it. A genuine seccomp syscall will see it as
     68   // SECCOMP_SET_MODE_STRICT.
     69   const unsigned int operation = 0;
     70   // Chosen by fair dice roll. Guaranteed to be random.
     71   const unsigned int flags = 0xf7a46a5c;
     72   const int rv = sys_seccomp(operation, flags, nullptr);
     73   // A genuine kernel would return -EINVAL (which would set rv to -1 and errno
     74   // to EINVAL), or at the very least return some kind of error (which would
     75   // set rv to -1). Any other behavior indicates that whatever code received
     76   // our syscall was not the real seccomp.
     77   if (rv != -1) {
     78     return true;
     79   }
     80 #endif  // defined(OS_ANDROID)
     81 
     82   return false;
     83 }
     84 
     85 // Check if the kernel supports seccomp-filter via the seccomp system call
     86 // and the TSYNC feature to enable seccomp on all threads.
     87 bool KernelSupportsSeccompTsync() {
     88   if (KernelHasLGBug()) {
     89     return false;
     90   }
     91 
     92   errno = 0;
     93   const int rv =
     94       sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, nullptr);
     95 
     96   if (rv == -1 && errno == EFAULT) {
     97     return true;
     98   } else {
     99     // TODO(jln): turn these into DCHECK after 417888 is considered fixed.
    100     CHECK_EQ(-1, rv);
    101     CHECK(ENOSYS == errno || EINVAL == errno);
    102     return false;
    103   }
    104 }
    105 
    106 uint64_t EscapePC() {
    107   intptr_t rv = Syscall::Call(-1);
    108   if (rv == -1 && errno == ENOSYS) {
    109     return 0;
    110   }
    111   return static_cast<uint64_t>(static_cast<uintptr_t>(rv));
    112 }
    113 
    114 intptr_t SandboxPanicTrap(const struct arch_seccomp_data&, void* aux) {
    115   SANDBOX_DIE(static_cast<const char*>(aux));
    116 }
    117 
    118 bpf_dsl::ResultExpr SandboxPanic(const char* error) {
    119   return bpf_dsl::Trap(SandboxPanicTrap, error);
    120 }
    121 
    122 }  // namespace
    123 
    124 SandboxBPF::SandboxBPF(bpf_dsl::Policy* policy)
    125     : proc_fd_(), sandbox_has_started_(false), policy_(policy) {
    126 }
    127 
    128 SandboxBPF::~SandboxBPF() {
    129 }
    130 
    131 // static
    132 bool SandboxBPF::SupportsSeccompSandbox(SeccompLevel level) {
    133   // Never pretend to support seccomp with Valgrind, as it
    134   // throws the tool off.
    135   if (IsRunningOnValgrind()) {
    136     return false;
    137   }
    138 
    139   switch (level) {
    140     case SeccompLevel::SINGLE_THREADED:
    141       return KernelSupportsSeccompBPF();
    142     case SeccompLevel::MULTI_THREADED:
    143       return KernelSupportsSeccompTsync();
    144   }
    145   NOTREACHED();
    146   return false;
    147 }
    148 
    149 bool SandboxBPF::StartSandbox(SeccompLevel seccomp_level) {
    150   DCHECK(policy_);
    151   CHECK(seccomp_level == SeccompLevel::SINGLE_THREADED ||
    152         seccomp_level == SeccompLevel::MULTI_THREADED);
    153 
    154   if (sandbox_has_started_) {
    155     SANDBOX_DIE(
    156         "Cannot repeatedly start sandbox. Create a separate Sandbox "
    157         "object instead.");
    158     return false;
    159   }
    160 
    161   if (!proc_fd_.is_valid()) {
    162     SetProcFd(ProcUtil::OpenProc());
    163   }
    164 
    165   const bool supports_tsync = KernelSupportsSeccompTsync();
    166 
    167   if (seccomp_level == SeccompLevel::SINGLE_THREADED) {
    168     // Wait for /proc/self/task/ to update if needed and assert the
    169     // process is single threaded.
    170     ThreadHelpers::AssertSingleThreaded(proc_fd_.get());
    171   } else if (seccomp_level == SeccompLevel::MULTI_THREADED) {
    172     if (IsSingleThreaded(proc_fd_.get())) {
    173       SANDBOX_DIE("Cannot start sandbox; "
    174                   "process may be single-threaded when reported as not");
    175       return false;
    176     }
    177     if (!supports_tsync) {
    178       SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing "
    179                   "filters for a threadgroup");
    180       return false;
    181     }
    182   }
    183 
    184   // We no longer need access to any files in /proc. We want to do this
    185   // before installing the filters, just in case that our policy denies
    186   // close().
    187   if (proc_fd_.is_valid()) {
    188     proc_fd_.reset();
    189   }
    190 
    191   // Install the filters.
    192   InstallFilter(supports_tsync ||
    193                 seccomp_level == SeccompLevel::MULTI_THREADED);
    194 
    195   return true;
    196 }
    197 
    198 void SandboxBPF::SetProcFd(base::ScopedFD proc_fd) {
    199   proc_fd_.swap(proc_fd);
    200 }
    201 
    202 // static
    203 bool SandboxBPF::IsValidSyscallNumber(int sysnum) {
    204   return SyscallSet::IsValid(sysnum);
    205 }
    206 
    207 // static
    208 bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) {
    209   return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno);
    210 }
    211 
    212 // static
    213 intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) {
    214   return Syscall::Call(
    215       args.nr, static_cast<intptr_t>(args.args[0]),
    216       static_cast<intptr_t>(args.args[1]), static_cast<intptr_t>(args.args[2]),
    217       static_cast<intptr_t>(args.args[3]), static_cast<intptr_t>(args.args[4]),
    218       static_cast<intptr_t>(args.args[5]));
    219 }
    220 
    221 CodeGen::Program SandboxBPF::AssembleFilter() {
    222   DCHECK(policy_);
    223 
    224   bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry());
    225   if (Trap::SandboxDebuggingAllowedByUser()) {
    226     compiler.DangerousSetEscapePC(EscapePC());
    227   }
    228   compiler.SetPanicFunc(SandboxPanic);
    229   return compiler.Compile();
    230 }
    231 
    232 void SandboxBPF::InstallFilter(bool must_sync_threads) {
    233   // We want to be very careful in not imposing any requirements on the
    234   // policies that are set with SetSandboxPolicy(). This means, as soon as
    235   // the sandbox is active, we shouldn't be relying on libraries that could
    236   // be making system calls. This, for example, means we should avoid
    237   // using the heap and we should avoid using STL functions.
    238   // Temporarily copy the contents of the "program" vector into a
    239   // stack-allocated array; and then explicitly destroy that object.
    240   // This makes sure we don't ex- or implicitly call new/delete after we
    241   // installed the BPF filter program in the kernel. Depending on the
    242   // system memory allocator that is in effect, these operators can result
    243   // in system calls to things like munmap() or brk().
    244   CodeGen::Program program = AssembleFilter();
    245 
    246   struct sock_filter bpf[program.size()];
    247   const struct sock_fprog prog = {static_cast<unsigned short>(program.size()),
    248                                   bpf};
    249   memcpy(bpf, &program[0], sizeof(bpf));
    250   CodeGen::Program().swap(program);  // vector swap trick
    251 
    252   // Make an attempt to release memory that is no longer needed here, rather
    253   // than in the destructor. Try to avoid as much as possible to presume of
    254   // what will be possible to do in the new (sandboxed) execution environment.
    255   policy_.reset();
    256 
    257   if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
    258     SANDBOX_DIE("Kernel refuses to enable no-new-privs");
    259   }
    260 
    261   // Install BPF filter program. If the thread state indicates multi-threading
    262   // support, then the kernel hass the seccomp system call. Otherwise, fall
    263   // back on prctl, which requires the process to be single-threaded.
    264   if (must_sync_threads) {
    265     int rv =
    266         sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &prog);
    267     if (rv) {
    268       SANDBOX_DIE(
    269           "Kernel refuses to turn on and synchronize threads for BPF filters");
    270     }
    271   } else {
    272     if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
    273       SANDBOX_DIE("Kernel refuses to turn on BPF filters");
    274     }
    275   }
    276 
    277   sandbox_has_started_ = true;
    278 }
    279 
    280 }  // namespace sandbox
    281