Home | History | Annotate | Download | only in seccomp-bpf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "sandbox/linux/seccomp-bpf/trap.h"
      6 
      7 #include <errno.h>
      8 #include <signal.h>
      9 #include <stddef.h>
     10 #include <stdint.h>
     11 #include <string.h>
     12 #include <sys/syscall.h>
     13 
     14 #include <algorithm>
     15 #include <limits>
     16 #include <tuple>
     17 
     18 #include "base/compiler_specific.h"
     19 #include "base/logging.h"
     20 #include "build/build_config.h"
     21 #include "sandbox/linux/bpf_dsl/seccomp_macros.h"
     22 #include "sandbox/linux/seccomp-bpf/die.h"
     23 #include "sandbox/linux/seccomp-bpf/syscall.h"
     24 #include "sandbox/linux/services/syscall_wrappers.h"
     25 #include "sandbox/linux/system_headers/linux_seccomp.h"
     26 #include "sandbox/linux/system_headers/linux_signal.h"
     27 
     28 namespace {
     29 
     30 struct arch_sigsys {
     31   void* ip;
     32   int nr;
     33   unsigned int arch;
     34 };
     35 
     36 const int kCapacityIncrement = 20;
     37 
     38 // Unsafe traps can only be turned on, if the user explicitly allowed them
     39 // by setting the CHROME_SANDBOX_DEBUGGING environment variable.
     40 const char kSandboxDebuggingEnv[] = "CHROME_SANDBOX_DEBUGGING";
     41 
     42 // We need to tell whether we are performing a "normal" callback, or
     43 // whether we were called recursively from within a UnsafeTrap() callback.
     44 // This is a little tricky to do, because we need to somehow get access to
     45 // per-thread data from within a signal context. Normal TLS storage is not
     46 // safely accessible at this time. We could roll our own, but that involves
     47 // a lot of complexity. Instead, we co-opt one bit in the signal mask.
     48 // If BUS is blocked, we assume that we have been called recursively.
     49 // There is a possibility for collision with other code that needs to do
     50 // this, but in practice the risks are low.
     51 // If SIGBUS turns out to be a problem, we could instead co-opt one of the
     52 // realtime signals. There are plenty of them. Unfortunately, there is no
     53 // way to mark a signal as allocated. So, the potential for collision is
     54 // possibly even worse.
     55 bool GetIsInSigHandler(const ucontext_t* ctx) {
     56   // Note: on Android, sigismember does not take a pointer to const.
     57   return sigismember(const_cast<sigset_t*>(&ctx->uc_sigmask), LINUX_SIGBUS);
     58 }
     59 
     60 void SetIsInSigHandler() {
     61   sigset_t mask;
     62   if (sigemptyset(&mask) || sigaddset(&mask, LINUX_SIGBUS) ||
     63       sandbox::sys_sigprocmask(LINUX_SIG_BLOCK, &mask, NULL)) {
     64     SANDBOX_DIE("Failed to block SIGBUS");
     65   }
     66 }
     67 
     68 bool IsDefaultSignalAction(const struct sigaction& sa) {
     69   if (sa.sa_flags & SA_SIGINFO || sa.sa_handler != SIG_DFL) {
     70     return false;
     71   }
     72   return true;
     73 }
     74 
     75 }  // namespace
     76 
     77 namespace sandbox {
     78 
     79 Trap::Trap()
     80     : trap_array_(NULL),
     81       trap_array_size_(0),
     82       trap_array_capacity_(0),
     83       has_unsafe_traps_(false) {
     84   // Set new SIGSYS handler
     85   struct sigaction sa = {};
     86   // In some toolchain, sa_sigaction is not declared in struct sigaction.
     87   // So, here cast the pointer to the sa_handler's type. This works because
     88   // |sa_handler| and |sa_sigaction| shares the same memory.
     89   sa.sa_handler = reinterpret_cast<void (*)(int)>(SigSysAction);
     90   sa.sa_flags = LINUX_SA_SIGINFO | LINUX_SA_NODEFER;
     91   struct sigaction old_sa = {};
     92   if (sys_sigaction(LINUX_SIGSYS, &sa, &old_sa) < 0) {
     93     SANDBOX_DIE("Failed to configure SIGSYS handler");
     94   }
     95 
     96   if (!IsDefaultSignalAction(old_sa)) {
     97     static const char kExistingSIGSYSMsg[] =
     98         "Existing signal handler when trying to install SIGSYS. SIGSYS needs "
     99         "to be reserved for seccomp-bpf.";
    100     DLOG(FATAL) << kExistingSIGSYSMsg;
    101     LOG(ERROR) << kExistingSIGSYSMsg;
    102   }
    103 
    104   // Unmask SIGSYS
    105   sigset_t mask;
    106   if (sigemptyset(&mask) || sigaddset(&mask, LINUX_SIGSYS) ||
    107       sys_sigprocmask(LINUX_SIG_UNBLOCK, &mask, NULL)) {
    108     SANDBOX_DIE("Failed to configure SIGSYS handler");
    109   }
    110 }
    111 
    112 bpf_dsl::TrapRegistry* Trap::Registry() {
    113   // Note: This class is not thread safe. It is the caller's responsibility
    114   // to avoid race conditions. Normally, this is a non-issue as the sandbox
    115   // can only be initialized if there are no other threads present.
    116   // Also, this is not a normal singleton. Once created, the global trap
    117   // object must never be destroyed again.
    118   if (!global_trap_) {
    119     global_trap_ = new Trap();
    120     if (!global_trap_) {
    121       SANDBOX_DIE("Failed to allocate global trap handler");
    122     }
    123   }
    124   return global_trap_;
    125 }
    126 
    127 void Trap::SigSysAction(int nr, LinuxSigInfo* info, void* void_context) {
    128   if (info) {
    129     MSAN_UNPOISON(info, sizeof(*info));
    130   }
    131 
    132   // Obtain the signal context. This, most notably, gives us access to
    133   // all CPU registers at the time of the signal.
    134   ucontext_t* ctx = reinterpret_cast<ucontext_t*>(void_context);
    135   if (ctx) {
    136     MSAN_UNPOISON(ctx, sizeof(*ctx));
    137   }
    138 
    139   if (!global_trap_) {
    140     RAW_SANDBOX_DIE(
    141         "This can't happen. Found no global singleton instance "
    142         "for Trap() handling.");
    143   }
    144   global_trap_->SigSys(nr, info, ctx);
    145 }
    146 
    147 void Trap::SigSys(int nr, LinuxSigInfo* info, ucontext_t* ctx) {
    148   // Signal handlers should always preserve "errno". Otherwise, we could
    149   // trigger really subtle bugs.
    150   const int old_errno = errno;
    151 
    152   // Various sanity checks to make sure we actually received a signal
    153   // triggered by a BPF filter. If something else triggered SIGSYS
    154   // (e.g. kill()), there is really nothing we can do with this signal.
    155   if (nr != LINUX_SIGSYS || info->si_code != SYS_SECCOMP || !ctx ||
    156       info->si_errno <= 0 ||
    157       static_cast<size_t>(info->si_errno) > trap_array_size_) {
    158     // ATI drivers seem to send SIGSYS, so this cannot be FATAL.
    159     // See crbug.com/178166.
    160     // TODO(jln): add a DCHECK or move back to FATAL.
    161     RAW_LOG(ERROR, "Unexpected SIGSYS received.");
    162     errno = old_errno;
    163     return;
    164   }
    165 
    166 
    167   // Obtain the siginfo information that is specific to SIGSYS. Unfortunately,
    168   // most versions of glibc don't include this information in siginfo_t. So,
    169   // we need to explicitly copy it into a arch_sigsys structure.
    170   struct arch_sigsys sigsys;
    171   memcpy(&sigsys, &info->_sifields, sizeof(sigsys));
    172 
    173 #if defined(__mips__)
    174   // When indirect syscall (syscall(__NR_foo, ...)) is made on Mips, the
    175   // number in register SECCOMP_SYSCALL(ctx) is always __NR_syscall and the
    176   // real number of a syscall (__NR_foo) is in SECCOMP_PARM1(ctx)
    177   bool sigsys_nr_is_bad = sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) &&
    178                           sigsys.nr != static_cast<int>(SECCOMP_PARM1(ctx));
    179 #else
    180   bool sigsys_nr_is_bad = sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx));
    181 #endif
    182 
    183   // Some more sanity checks.
    184   if (sigsys.ip != reinterpret_cast<void*>(SECCOMP_IP(ctx)) ||
    185       sigsys_nr_is_bad || sigsys.arch != SECCOMP_ARCH) {
    186     // TODO(markus):
    187     // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal
    188     // safe and can lead to bugs. We should eventually implement a different
    189     // logging and reporting mechanism that is safe to be called from
    190     // the sigSys() handler.
    191     RAW_SANDBOX_DIE("Sanity checks are failing after receiving SIGSYS.");
    192   }
    193 
    194   intptr_t rc;
    195   if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) {
    196     errno = old_errno;
    197     if (sigsys.nr == __NR_clone) {
    198       RAW_SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler.");
    199     }
    200 #if defined(__mips__)
    201     // Mips supports up to eight arguments for syscall.
    202     // However, seccomp bpf can filter only up to six arguments, so using eight
    203     // arguments has sense only when using UnsafeTrap() handler.
    204     rc = Syscall::Call(SECCOMP_SYSCALL(ctx),
    205                        SECCOMP_PARM1(ctx),
    206                        SECCOMP_PARM2(ctx),
    207                        SECCOMP_PARM3(ctx),
    208                        SECCOMP_PARM4(ctx),
    209                        SECCOMP_PARM5(ctx),
    210                        SECCOMP_PARM6(ctx),
    211                        SECCOMP_PARM7(ctx),
    212                        SECCOMP_PARM8(ctx));
    213 #else
    214     rc = Syscall::Call(SECCOMP_SYSCALL(ctx),
    215                        SECCOMP_PARM1(ctx),
    216                        SECCOMP_PARM2(ctx),
    217                        SECCOMP_PARM3(ctx),
    218                        SECCOMP_PARM4(ctx),
    219                        SECCOMP_PARM5(ctx),
    220                        SECCOMP_PARM6(ctx));
    221 #endif  // defined(__mips__)
    222   } else {
    223     const TrapKey& trap = trap_array_[info->si_errno - 1];
    224     if (!trap.safe) {
    225       SetIsInSigHandler();
    226     }
    227 
    228     // Copy the seccomp-specific data into a arch_seccomp_data structure. This
    229     // is what we are showing to TrapFnc callbacks that the system call
    230     // evaluator registered with the sandbox.
    231     struct arch_seccomp_data data = {
    232         static_cast<int>(SECCOMP_SYSCALL(ctx)),
    233         SECCOMP_ARCH,
    234         reinterpret_cast<uint64_t>(sigsys.ip),
    235         {static_cast<uint64_t>(SECCOMP_PARM1(ctx)),
    236          static_cast<uint64_t>(SECCOMP_PARM2(ctx)),
    237          static_cast<uint64_t>(SECCOMP_PARM3(ctx)),
    238          static_cast<uint64_t>(SECCOMP_PARM4(ctx)),
    239          static_cast<uint64_t>(SECCOMP_PARM5(ctx)),
    240          static_cast<uint64_t>(SECCOMP_PARM6(ctx))}};
    241 
    242     // Now call the TrapFnc callback associated with this particular instance
    243     // of SECCOMP_RET_TRAP.
    244     rc = trap.fnc(data, const_cast<void*>(trap.aux));
    245   }
    246 
    247   // Update the CPU register that stores the return code of the system call
    248   // that we just handled, and restore "errno" to the value that it had
    249   // before entering the signal handler.
    250   Syscall::PutValueInUcontext(rc, ctx);
    251   errno = old_errno;
    252 
    253   return;
    254 }
    255 
    256 bool Trap::TrapKey::operator<(const TrapKey& o) const {
    257   return std::tie(fnc, aux, safe) < std::tie(o.fnc, o.aux, o.safe);
    258 }
    259 
    260 uint16_t Trap::Add(TrapFnc fnc, const void* aux, bool safe) {
    261   if (!safe && !SandboxDebuggingAllowedByUser()) {
    262     // Unless the user set the CHROME_SANDBOX_DEBUGGING environment variable,
    263     // we never return an ErrorCode that is marked as "unsafe". This also
    264     // means, the BPF compiler will never emit code that allow unsafe system
    265     // calls to by-pass the filter (because they use the magic return address
    266     // from Syscall::Call(-1)).
    267 
    268     // This SANDBOX_DIE() can optionally be removed. It won't break security,
    269     // but it might make error messages from the BPF compiler a little harder
    270     // to understand. Removing the SANDBOX_DIE() allows callers to easily check
    271     // whether unsafe traps are supported (by checking whether the returned
    272     // ErrorCode is ET_INVALID).
    273     SANDBOX_DIE(
    274         "Cannot use unsafe traps unless CHROME_SANDBOX_DEBUGGING "
    275         "is enabled");
    276 
    277     return 0;
    278   }
    279 
    280   // Each unique pair of TrapFnc and auxiliary data make up a distinct instance
    281   // of a SECCOMP_RET_TRAP.
    282   TrapKey key(fnc, aux, safe);
    283 
    284   // We return unique identifiers together with SECCOMP_RET_TRAP. This allows
    285   // us to associate trap with the appropriate handler. The kernel allows us
    286   // identifiers in the range from 0 to SECCOMP_RET_DATA (0xFFFF). We want to
    287   // avoid 0, as it could be confused for a trap without any specific id.
    288   // The nice thing about sequentially numbered identifiers is that we can also
    289   // trivially look them up from our signal handler without making any system
    290   // calls that might be async-signal-unsafe.
    291   // In order to do so, we store all of our traps in a C-style trap_array_.
    292 
    293   TrapIds::const_iterator iter = trap_ids_.find(key);
    294   if (iter != trap_ids_.end()) {
    295     // We have seen this pair before. Return the same id that we assigned
    296     // earlier.
    297     return iter->second;
    298   }
    299 
    300   // This is a new pair. Remember it and assign a new id.
    301   if (trap_array_size_ >= SECCOMP_RET_DATA /* 0xFFFF */ ||
    302       trap_array_size_ >= std::numeric_limits<uint16_t>::max()) {
    303     // In practice, this is pretty much impossible to trigger, as there
    304     // are other kernel limitations that restrict overall BPF program sizes.
    305     SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");
    306   }
    307 
    308   // Our callers ensure that there are no other threads accessing trap_array_
    309   // concurrently (typically this is done by ensuring that we are single-
    310   // threaded while the sandbox is being set up). But we nonetheless are
    311   // modifying a live data structure that could be accessed any time a
    312   // system call is made; as system calls could be triggering SIGSYS.
    313   // So, we have to be extra careful that we update trap_array_ atomically.
    314   // In particular, this means we shouldn't be using realloc() to resize it.
    315   // Instead, we allocate a new array, copy the values, and then switch the
    316   // pointer. We only really care about the pointer being updated atomically
    317   // and the data that is pointed to being valid, as these are the only
    318   // values accessed from the signal handler. It is OK if trap_array_size_
    319   // is inconsistent with the pointer, as it is monotonously increasing.
    320   // Also, we only care about compiler barriers, as the signal handler is
    321   // triggered synchronously from a system call. We don't have to protect
    322   // against issues with the memory model or with completely asynchronous
    323   // events.
    324   if (trap_array_size_ >= trap_array_capacity_) {
    325     trap_array_capacity_ += kCapacityIncrement;
    326     TrapKey* old_trap_array = trap_array_;
    327     TrapKey* new_trap_array = new TrapKey[trap_array_capacity_];
    328     std::copy_n(old_trap_array, trap_array_size_, new_trap_array);
    329 
    330     // Language specs are unclear on whether the compiler is allowed to move
    331     // the "delete[]" above our preceding assignments and/or memory moves,
    332     // iff the compiler believes that "delete[]" doesn't have any other
    333     // global side-effects.
    334     // We insert optimization barriers to prevent this from happening.
    335     // The first barrier is probably not needed, but better be explicit in
    336     // what we want to tell the compiler.
    337     // The clang developer mailing list couldn't answer whether this is a
    338     // legitimate worry; but they at least thought that the barrier is
    339     // sufficient to prevent the (so far hypothetical) problem of re-ordering
    340     // of instructions by the compiler.
    341     //
    342     // TODO(mdempsky): Try to clean this up using base/atomicops or C++11
    343     // atomics; see crbug.com/414363.
    344     asm volatile("" : "=r"(new_trap_array) : "0"(new_trap_array) : "memory");
    345     trap_array_ = new_trap_array;
    346     asm volatile("" : "=r"(trap_array_) : "0"(trap_array_) : "memory");
    347 
    348     delete[] old_trap_array;
    349   }
    350 
    351   uint16_t id = trap_array_size_ + 1;
    352   trap_ids_[key] = id;
    353   trap_array_[trap_array_size_] = key;
    354   trap_array_size_++;
    355   return id;
    356 }
    357 
    358 bool Trap::SandboxDebuggingAllowedByUser() {
    359   const char* debug_flag = getenv(kSandboxDebuggingEnv);
    360   return debug_flag && *debug_flag;
    361 }
    362 
    363 bool Trap::EnableUnsafeTraps() {
    364   if (!has_unsafe_traps_) {
    365     // Unsafe traps are a one-way fuse. Once enabled, they can never be turned
    366     // off again.
    367     // We only allow enabling unsafe traps, if the user explicitly set an
    368     // appropriate environment variable. This prevents bugs that accidentally
    369     // disable all sandboxing for all users.
    370     if (SandboxDebuggingAllowedByUser()) {
    371       // We only ever print this message once, when we enable unsafe traps the
    372       // first time.
    373       SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes");
    374       has_unsafe_traps_ = true;
    375     } else {
    376       SANDBOX_INFO(
    377           "Cannot disable sandbox and use unsafe traps unless "
    378           "CHROME_SANDBOX_DEBUGGING is turned on first");
    379     }
    380   }
    381   // Returns the, possibly updated, value of has_unsafe_traps_.
    382   return has_unsafe_traps_;
    383 }
    384 
    385 Trap* Trap::global_trap_;
    386 
    387 }  // namespace sandbox
    388