1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "sandbox/linux/seccomp-bpf/trap.h" 6 7 #include <errno.h> 8 #include <signal.h> 9 #include <stddef.h> 10 #include <stdint.h> 11 #include <string.h> 12 #include <sys/syscall.h> 13 14 #include <algorithm> 15 #include <limits> 16 #include <tuple> 17 18 #include "base/compiler_specific.h" 19 #include "base/logging.h" 20 #include "build/build_config.h" 21 #include "sandbox/linux/bpf_dsl/seccomp_macros.h" 22 #include "sandbox/linux/seccomp-bpf/die.h" 23 #include "sandbox/linux/seccomp-bpf/syscall.h" 24 #include "sandbox/linux/services/syscall_wrappers.h" 25 #include "sandbox/linux/system_headers/linux_seccomp.h" 26 #include "sandbox/linux/system_headers/linux_signal.h" 27 28 namespace { 29 30 struct arch_sigsys { 31 void* ip; 32 int nr; 33 unsigned int arch; 34 }; 35 36 const int kCapacityIncrement = 20; 37 38 // Unsafe traps can only be turned on, if the user explicitly allowed them 39 // by setting the CHROME_SANDBOX_DEBUGGING environment variable. 40 const char kSandboxDebuggingEnv[] = "CHROME_SANDBOX_DEBUGGING"; 41 42 // We need to tell whether we are performing a "normal" callback, or 43 // whether we were called recursively from within a UnsafeTrap() callback. 44 // This is a little tricky to do, because we need to somehow get access to 45 // per-thread data from within a signal context. Normal TLS storage is not 46 // safely accessible at this time. We could roll our own, but that involves 47 // a lot of complexity. Instead, we co-opt one bit in the signal mask. 48 // If BUS is blocked, we assume that we have been called recursively. 49 // There is a possibility for collision with other code that needs to do 50 // this, but in practice the risks are low. 51 // If SIGBUS turns out to be a problem, we could instead co-opt one of the 52 // realtime signals. There are plenty of them. Unfortunately, there is no 53 // way to mark a signal as allocated. So, the potential for collision is 54 // possibly even worse. 55 bool GetIsInSigHandler(const ucontext_t* ctx) { 56 // Note: on Android, sigismember does not take a pointer to const. 57 return sigismember(const_cast<sigset_t*>(&ctx->uc_sigmask), LINUX_SIGBUS); 58 } 59 60 void SetIsInSigHandler() { 61 sigset_t mask; 62 if (sigemptyset(&mask) || sigaddset(&mask, LINUX_SIGBUS) || 63 sandbox::sys_sigprocmask(LINUX_SIG_BLOCK, &mask, NULL)) { 64 SANDBOX_DIE("Failed to block SIGBUS"); 65 } 66 } 67 68 bool IsDefaultSignalAction(const struct sigaction& sa) { 69 if (sa.sa_flags & SA_SIGINFO || sa.sa_handler != SIG_DFL) { 70 return false; 71 } 72 return true; 73 } 74 75 } // namespace 76 77 namespace sandbox { 78 79 Trap::Trap() 80 : trap_array_(NULL), 81 trap_array_size_(0), 82 trap_array_capacity_(0), 83 has_unsafe_traps_(false) { 84 // Set new SIGSYS handler 85 struct sigaction sa = {}; 86 // In some toolchain, sa_sigaction is not declared in struct sigaction. 87 // So, here cast the pointer to the sa_handler's type. This works because 88 // |sa_handler| and |sa_sigaction| shares the same memory. 89 sa.sa_handler = reinterpret_cast<void (*)(int)>(SigSysAction); 90 sa.sa_flags = LINUX_SA_SIGINFO | LINUX_SA_NODEFER; 91 struct sigaction old_sa = {}; 92 if (sys_sigaction(LINUX_SIGSYS, &sa, &old_sa) < 0) { 93 SANDBOX_DIE("Failed to configure SIGSYS handler"); 94 } 95 96 if (!IsDefaultSignalAction(old_sa)) { 97 static const char kExistingSIGSYSMsg[] = 98 "Existing signal handler when trying to install SIGSYS. SIGSYS needs " 99 "to be reserved for seccomp-bpf."; 100 DLOG(FATAL) << kExistingSIGSYSMsg; 101 LOG(ERROR) << kExistingSIGSYSMsg; 102 } 103 104 // Unmask SIGSYS 105 sigset_t mask; 106 if (sigemptyset(&mask) || sigaddset(&mask, LINUX_SIGSYS) || 107 sys_sigprocmask(LINUX_SIG_UNBLOCK, &mask, NULL)) { 108 SANDBOX_DIE("Failed to configure SIGSYS handler"); 109 } 110 } 111 112 bpf_dsl::TrapRegistry* Trap::Registry() { 113 // Note: This class is not thread safe. It is the caller's responsibility 114 // to avoid race conditions. Normally, this is a non-issue as the sandbox 115 // can only be initialized if there are no other threads present. 116 // Also, this is not a normal singleton. Once created, the global trap 117 // object must never be destroyed again. 118 if (!global_trap_) { 119 global_trap_ = new Trap(); 120 if (!global_trap_) { 121 SANDBOX_DIE("Failed to allocate global trap handler"); 122 } 123 } 124 return global_trap_; 125 } 126 127 void Trap::SigSysAction(int nr, LinuxSigInfo* info, void* void_context) { 128 if (info) { 129 MSAN_UNPOISON(info, sizeof(*info)); 130 } 131 132 // Obtain the signal context. This, most notably, gives us access to 133 // all CPU registers at the time of the signal. 134 ucontext_t* ctx = reinterpret_cast<ucontext_t*>(void_context); 135 if (ctx) { 136 MSAN_UNPOISON(ctx, sizeof(*ctx)); 137 } 138 139 if (!global_trap_) { 140 RAW_SANDBOX_DIE( 141 "This can't happen. Found no global singleton instance " 142 "for Trap() handling."); 143 } 144 global_trap_->SigSys(nr, info, ctx); 145 } 146 147 void Trap::SigSys(int nr, LinuxSigInfo* info, ucontext_t* ctx) { 148 // Signal handlers should always preserve "errno". Otherwise, we could 149 // trigger really subtle bugs. 150 const int old_errno = errno; 151 152 // Various sanity checks to make sure we actually received a signal 153 // triggered by a BPF filter. If something else triggered SIGSYS 154 // (e.g. kill()), there is really nothing we can do with this signal. 155 if (nr != LINUX_SIGSYS || info->si_code != SYS_SECCOMP || !ctx || 156 info->si_errno <= 0 || 157 static_cast<size_t>(info->si_errno) > trap_array_size_) { 158 // ATI drivers seem to send SIGSYS, so this cannot be FATAL. 159 // See crbug.com/178166. 160 // TODO(jln): add a DCHECK or move back to FATAL. 161 RAW_LOG(ERROR, "Unexpected SIGSYS received."); 162 errno = old_errno; 163 return; 164 } 165 166 167 // Obtain the siginfo information that is specific to SIGSYS. Unfortunately, 168 // most versions of glibc don't include this information in siginfo_t. So, 169 // we need to explicitly copy it into a arch_sigsys structure. 170 struct arch_sigsys sigsys; 171 memcpy(&sigsys, &info->_sifields, sizeof(sigsys)); 172 173 #if defined(__mips__) 174 // When indirect syscall (syscall(__NR_foo, ...)) is made on Mips, the 175 // number in register SECCOMP_SYSCALL(ctx) is always __NR_syscall and the 176 // real number of a syscall (__NR_foo) is in SECCOMP_PARM1(ctx) 177 bool sigsys_nr_is_bad = sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) && 178 sigsys.nr != static_cast<int>(SECCOMP_PARM1(ctx)); 179 #else 180 bool sigsys_nr_is_bad = sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)); 181 #endif 182 183 // Some more sanity checks. 184 if (sigsys.ip != reinterpret_cast<void*>(SECCOMP_IP(ctx)) || 185 sigsys_nr_is_bad || sigsys.arch != SECCOMP_ARCH) { 186 // TODO(markus): 187 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal 188 // safe and can lead to bugs. We should eventually implement a different 189 // logging and reporting mechanism that is safe to be called from 190 // the sigSys() handler. 191 RAW_SANDBOX_DIE("Sanity checks are failing after receiving SIGSYS."); 192 } 193 194 intptr_t rc; 195 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) { 196 errno = old_errno; 197 if (sigsys.nr == __NR_clone) { 198 RAW_SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler."); 199 } 200 #if defined(__mips__) 201 // Mips supports up to eight arguments for syscall. 202 // However, seccomp bpf can filter only up to six arguments, so using eight 203 // arguments has sense only when using UnsafeTrap() handler. 204 rc = Syscall::Call(SECCOMP_SYSCALL(ctx), 205 SECCOMP_PARM1(ctx), 206 SECCOMP_PARM2(ctx), 207 SECCOMP_PARM3(ctx), 208 SECCOMP_PARM4(ctx), 209 SECCOMP_PARM5(ctx), 210 SECCOMP_PARM6(ctx), 211 SECCOMP_PARM7(ctx), 212 SECCOMP_PARM8(ctx)); 213 #else 214 rc = Syscall::Call(SECCOMP_SYSCALL(ctx), 215 SECCOMP_PARM1(ctx), 216 SECCOMP_PARM2(ctx), 217 SECCOMP_PARM3(ctx), 218 SECCOMP_PARM4(ctx), 219 SECCOMP_PARM5(ctx), 220 SECCOMP_PARM6(ctx)); 221 #endif // defined(__mips__) 222 } else { 223 const TrapKey& trap = trap_array_[info->si_errno - 1]; 224 if (!trap.safe) { 225 SetIsInSigHandler(); 226 } 227 228 // Copy the seccomp-specific data into a arch_seccomp_data structure. This 229 // is what we are showing to TrapFnc callbacks that the system call 230 // evaluator registered with the sandbox. 231 struct arch_seccomp_data data = { 232 static_cast<int>(SECCOMP_SYSCALL(ctx)), 233 SECCOMP_ARCH, 234 reinterpret_cast<uint64_t>(sigsys.ip), 235 {static_cast<uint64_t>(SECCOMP_PARM1(ctx)), 236 static_cast<uint64_t>(SECCOMP_PARM2(ctx)), 237 static_cast<uint64_t>(SECCOMP_PARM3(ctx)), 238 static_cast<uint64_t>(SECCOMP_PARM4(ctx)), 239 static_cast<uint64_t>(SECCOMP_PARM5(ctx)), 240 static_cast<uint64_t>(SECCOMP_PARM6(ctx))}}; 241 242 // Now call the TrapFnc callback associated with this particular instance 243 // of SECCOMP_RET_TRAP. 244 rc = trap.fnc(data, const_cast<void*>(trap.aux)); 245 } 246 247 // Update the CPU register that stores the return code of the system call 248 // that we just handled, and restore "errno" to the value that it had 249 // before entering the signal handler. 250 Syscall::PutValueInUcontext(rc, ctx); 251 errno = old_errno; 252 253 return; 254 } 255 256 bool Trap::TrapKey::operator<(const TrapKey& o) const { 257 return std::tie(fnc, aux, safe) < std::tie(o.fnc, o.aux, o.safe); 258 } 259 260 uint16_t Trap::Add(TrapFnc fnc, const void* aux, bool safe) { 261 if (!safe && !SandboxDebuggingAllowedByUser()) { 262 // Unless the user set the CHROME_SANDBOX_DEBUGGING environment variable, 263 // we never return an ErrorCode that is marked as "unsafe". This also 264 // means, the BPF compiler will never emit code that allow unsafe system 265 // calls to by-pass the filter (because they use the magic return address 266 // from Syscall::Call(-1)). 267 268 // This SANDBOX_DIE() can optionally be removed. It won't break security, 269 // but it might make error messages from the BPF compiler a little harder 270 // to understand. Removing the SANDBOX_DIE() allows callers to easily check 271 // whether unsafe traps are supported (by checking whether the returned 272 // ErrorCode is ET_INVALID). 273 SANDBOX_DIE( 274 "Cannot use unsafe traps unless CHROME_SANDBOX_DEBUGGING " 275 "is enabled"); 276 277 return 0; 278 } 279 280 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance 281 // of a SECCOMP_RET_TRAP. 282 TrapKey key(fnc, aux, safe); 283 284 // We return unique identifiers together with SECCOMP_RET_TRAP. This allows 285 // us to associate trap with the appropriate handler. The kernel allows us 286 // identifiers in the range from 0 to SECCOMP_RET_DATA (0xFFFF). We want to 287 // avoid 0, as it could be confused for a trap without any specific id. 288 // The nice thing about sequentially numbered identifiers is that we can also 289 // trivially look them up from our signal handler without making any system 290 // calls that might be async-signal-unsafe. 291 // In order to do so, we store all of our traps in a C-style trap_array_. 292 293 TrapIds::const_iterator iter = trap_ids_.find(key); 294 if (iter != trap_ids_.end()) { 295 // We have seen this pair before. Return the same id that we assigned 296 // earlier. 297 return iter->second; 298 } 299 300 // This is a new pair. Remember it and assign a new id. 301 if (trap_array_size_ >= SECCOMP_RET_DATA /* 0xFFFF */ || 302 trap_array_size_ >= std::numeric_limits<uint16_t>::max()) { 303 // In practice, this is pretty much impossible to trigger, as there 304 // are other kernel limitations that restrict overall BPF program sizes. 305 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); 306 } 307 308 // Our callers ensure that there are no other threads accessing trap_array_ 309 // concurrently (typically this is done by ensuring that we are single- 310 // threaded while the sandbox is being set up). But we nonetheless are 311 // modifying a live data structure that could be accessed any time a 312 // system call is made; as system calls could be triggering SIGSYS. 313 // So, we have to be extra careful that we update trap_array_ atomically. 314 // In particular, this means we shouldn't be using realloc() to resize it. 315 // Instead, we allocate a new array, copy the values, and then switch the 316 // pointer. We only really care about the pointer being updated atomically 317 // and the data that is pointed to being valid, as these are the only 318 // values accessed from the signal handler. It is OK if trap_array_size_ 319 // is inconsistent with the pointer, as it is monotonously increasing. 320 // Also, we only care about compiler barriers, as the signal handler is 321 // triggered synchronously from a system call. We don't have to protect 322 // against issues with the memory model or with completely asynchronous 323 // events. 324 if (trap_array_size_ >= trap_array_capacity_) { 325 trap_array_capacity_ += kCapacityIncrement; 326 TrapKey* old_trap_array = trap_array_; 327 TrapKey* new_trap_array = new TrapKey[trap_array_capacity_]; 328 std::copy_n(old_trap_array, trap_array_size_, new_trap_array); 329 330 // Language specs are unclear on whether the compiler is allowed to move 331 // the "delete[]" above our preceding assignments and/or memory moves, 332 // iff the compiler believes that "delete[]" doesn't have any other 333 // global side-effects. 334 // We insert optimization barriers to prevent this from happening. 335 // The first barrier is probably not needed, but better be explicit in 336 // what we want to tell the compiler. 337 // The clang developer mailing list couldn't answer whether this is a 338 // legitimate worry; but they at least thought that the barrier is 339 // sufficient to prevent the (so far hypothetical) problem of re-ordering 340 // of instructions by the compiler. 341 // 342 // TODO(mdempsky): Try to clean this up using base/atomicops or C++11 343 // atomics; see crbug.com/414363. 344 asm volatile("" : "=r"(new_trap_array) : "0"(new_trap_array) : "memory"); 345 trap_array_ = new_trap_array; 346 asm volatile("" : "=r"(trap_array_) : "0"(trap_array_) : "memory"); 347 348 delete[] old_trap_array; 349 } 350 351 uint16_t id = trap_array_size_ + 1; 352 trap_ids_[key] = id; 353 trap_array_[trap_array_size_] = key; 354 trap_array_size_++; 355 return id; 356 } 357 358 bool Trap::SandboxDebuggingAllowedByUser() { 359 const char* debug_flag = getenv(kSandboxDebuggingEnv); 360 return debug_flag && *debug_flag; 361 } 362 363 bool Trap::EnableUnsafeTraps() { 364 if (!has_unsafe_traps_) { 365 // Unsafe traps are a one-way fuse. Once enabled, they can never be turned 366 // off again. 367 // We only allow enabling unsafe traps, if the user explicitly set an 368 // appropriate environment variable. This prevents bugs that accidentally 369 // disable all sandboxing for all users. 370 if (SandboxDebuggingAllowedByUser()) { 371 // We only ever print this message once, when we enable unsafe traps the 372 // first time. 373 SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes"); 374 has_unsafe_traps_ = true; 375 } else { 376 SANDBOX_INFO( 377 "Cannot disable sandbox and use unsafe traps unless " 378 "CHROME_SANDBOX_DEBUGGING is turned on first"); 379 } 380 } 381 // Returns the, possibly updated, value of has_unsafe_traps_. 382 return has_unsafe_traps_; 383 } 384 385 Trap* Trap::global_trap_; 386 387 } // namespace sandbox 388