1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "sandbox/linux/bpf_dsl/policy_compiler.h" 6 7 #include <errno.h> 8 #include <stddef.h> 9 #include <stdint.h> 10 #include <sys/syscall.h> 11 12 #include <limits> 13 14 #include "base/logging.h" 15 #include "base/macros.h" 16 #include "sandbox/linux/bpf_dsl/bpf_dsl.h" 17 #include "sandbox/linux/bpf_dsl/bpf_dsl_impl.h" 18 #include "sandbox/linux/bpf_dsl/codegen.h" 19 #include "sandbox/linux/bpf_dsl/policy.h" 20 #include "sandbox/linux/bpf_dsl/seccomp_macros.h" 21 #include "sandbox/linux/bpf_dsl/syscall_set.h" 22 #include "sandbox/linux/system_headers/linux_filter.h" 23 #include "sandbox/linux/system_headers/linux_seccomp.h" 24 #include "sandbox/linux/system_headers/linux_syscalls.h" 25 26 namespace sandbox { 27 namespace bpf_dsl { 28 29 namespace { 30 31 #if defined(__i386__) || defined(__x86_64__) 32 const bool kIsIntel = true; 33 #else 34 const bool kIsIntel = false; 35 #endif 36 #if defined(__x86_64__) && defined(__ILP32__) 37 const bool kIsX32 = true; 38 #else 39 const bool kIsX32 = false; 40 #endif 41 42 const int kSyscallsRequiredForUnsafeTraps[] = { 43 __NR_rt_sigprocmask, 44 __NR_rt_sigreturn, 45 #if defined(__NR_sigprocmask) 46 __NR_sigprocmask, 47 #endif 48 #if defined(__NR_sigreturn) 49 __NR_sigreturn, 50 #endif 51 }; 52 53 bool HasExactlyOneBit(uint64_t x) { 54 // Common trick; e.g., see http://stackoverflow.com/a/108329. 55 return x != 0 && (x & (x - 1)) == 0; 56 } 57 58 ResultExpr DefaultPanic(const char* error) { 59 return Kill(); 60 } 61 62 // A Trap() handler that returns an "errno" value. The value is encoded 63 // in the "aux" parameter. 64 intptr_t ReturnErrno(const struct arch_seccomp_data&, void* aux) { 65 // TrapFnc functions report error by following the native kernel convention 66 // of returning an exit code in the range of -1..-4096. They do not try to 67 // set errno themselves. The glibc wrapper that triggered the SIGSYS will 68 // ultimately do so for us. 69 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA; 70 return -err; 71 } 72 73 bool HasUnsafeTraps(const Policy* policy) { 74 DCHECK(policy); 75 for (uint32_t sysnum : SyscallSet::ValidOnly()) { 76 if (policy->EvaluateSyscall(sysnum)->HasUnsafeTraps()) { 77 return true; 78 } 79 } 80 return policy->InvalidSyscall()->HasUnsafeTraps(); 81 } 82 83 } // namespace 84 85 struct PolicyCompiler::Range { 86 uint32_t from; 87 CodeGen::Node node; 88 }; 89 90 PolicyCompiler::PolicyCompiler(const Policy* policy, TrapRegistry* registry) 91 : policy_(policy), 92 registry_(registry), 93 escapepc_(0), 94 panic_func_(DefaultPanic), 95 gen_(), 96 has_unsafe_traps_(HasUnsafeTraps(policy_)) { 97 DCHECK(policy); 98 } 99 100 PolicyCompiler::~PolicyCompiler() { 101 } 102 103 CodeGen::Program PolicyCompiler::Compile() { 104 CHECK(policy_->InvalidSyscall()->IsDeny()) 105 << "Policies should deny invalid system calls"; 106 107 // If our BPF program has unsafe traps, enable support for them. 108 if (has_unsafe_traps_) { 109 CHECK_NE(0U, escapepc_) << "UnsafeTrap() requires a valid escape PC"; 110 111 for (int sysnum : kSyscallsRequiredForUnsafeTraps) { 112 CHECK(policy_->EvaluateSyscall(sysnum)->IsAllow()) 113 << "Policies that use UnsafeTrap() must unconditionally allow all " 114 "required system calls"; 115 } 116 117 CHECK(registry_->EnableUnsafeTraps()) 118 << "We'd rather die than enable unsafe traps"; 119 } 120 121 // Assemble the BPF filter program. 122 return gen_.Compile(AssemblePolicy()); 123 } 124 125 void PolicyCompiler::DangerousSetEscapePC(uint64_t escapepc) { 126 escapepc_ = escapepc; 127 } 128 129 void PolicyCompiler::SetPanicFunc(PanicFunc panic_func) { 130 panic_func_ = panic_func; 131 } 132 133 CodeGen::Node PolicyCompiler::AssemblePolicy() { 134 // A compiled policy consists of three logical parts: 135 // 1. Check that the "arch" field matches the expected architecture. 136 // 2. If the policy involves unsafe traps, check if the syscall was 137 // invoked by Syscall::Call, and then allow it unconditionally. 138 // 3. Check the system call number and jump to the appropriate compiled 139 // system call policy number. 140 return CheckArch(MaybeAddEscapeHatch(DispatchSyscall())); 141 } 142 143 CodeGen::Node PolicyCompiler::CheckArch(CodeGen::Node passed) { 144 // If the architecture doesn't match SECCOMP_ARCH, disallow the 145 // system call. 146 return gen_.MakeInstruction( 147 BPF_LD + BPF_W + BPF_ABS, SECCOMP_ARCH_IDX, 148 gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, SECCOMP_ARCH, passed, 149 CompileResult(panic_func_( 150 "Invalid audit architecture in BPF filter")))); 151 } 152 153 CodeGen::Node PolicyCompiler::MaybeAddEscapeHatch(CodeGen::Node rest) { 154 // If no unsafe traps, then simply return |rest|. 155 if (!has_unsafe_traps_) { 156 return rest; 157 } 158 159 // We already enabled unsafe traps in Compile, but enable them again to give 160 // the trap registry a second chance to complain before we add the backdoor. 161 CHECK(registry_->EnableUnsafeTraps()); 162 163 // Allow system calls, if they originate from our magic return address. 164 const uint32_t lopc = static_cast<uint32_t>(escapepc_); 165 const uint32_t hipc = static_cast<uint32_t>(escapepc_ >> 32); 166 167 // BPF cannot do native 64-bit comparisons, so we have to compare 168 // both 32-bit halves of the instruction pointer. If they match what 169 // we expect, we return ERR_ALLOWED. If either or both don't match, 170 // we continue evalutating the rest of the sandbox policy. 171 // 172 // For simplicity, we check the full 64-bit instruction pointer even 173 // on 32-bit architectures. 174 return gen_.MakeInstruction( 175 BPF_LD + BPF_W + BPF_ABS, SECCOMP_IP_LSB_IDX, 176 gen_.MakeInstruction( 177 BPF_JMP + BPF_JEQ + BPF_K, lopc, 178 gen_.MakeInstruction( 179 BPF_LD + BPF_W + BPF_ABS, SECCOMP_IP_MSB_IDX, 180 gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, hipc, 181 CompileResult(Allow()), rest)), 182 rest)); 183 } 184 185 CodeGen::Node PolicyCompiler::DispatchSyscall() { 186 // Evaluate all possible system calls and group their Nodes into 187 // ranges of identical codes. 188 Ranges ranges; 189 FindRanges(&ranges); 190 191 // Compile the system call ranges to an optimized BPF jumptable 192 CodeGen::Node jumptable = AssembleJumpTable(ranges.begin(), ranges.end()); 193 194 // Grab the system call number, so that we can check it and then 195 // execute the jump table. 196 return gen_.MakeInstruction( 197 BPF_LD + BPF_W + BPF_ABS, SECCOMP_NR_IDX, CheckSyscallNumber(jumptable)); 198 } 199 200 CodeGen::Node PolicyCompiler::CheckSyscallNumber(CodeGen::Node passed) { 201 if (kIsIntel) { 202 // On Intel architectures, verify that system call numbers are in the 203 // expected number range. 204 CodeGen::Node invalidX32 = 205 CompileResult(panic_func_("Illegal mixing of system call ABIs")); 206 if (kIsX32) { 207 // The newer x32 API always sets bit 30. 208 return gen_.MakeInstruction( 209 BPF_JMP + BPF_JSET + BPF_K, 0x40000000, passed, invalidX32); 210 } else { 211 // The older i386 and x86-64 APIs clear bit 30 on all system calls. 212 return gen_.MakeInstruction( 213 BPF_JMP + BPF_JSET + BPF_K, 0x40000000, invalidX32, passed); 214 } 215 } 216 217 // TODO(mdempsky): Similar validation for other architectures? 218 return passed; 219 } 220 221 void PolicyCompiler::FindRanges(Ranges* ranges) { 222 // Please note that "struct seccomp_data" defines system calls as a signed 223 // int32_t, but BPF instructions always operate on unsigned quantities. We 224 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, 225 // and then verifying that the rest of the number range (both positive and 226 // negative) all return the same Node. 227 const CodeGen::Node invalid_node = CompileResult(policy_->InvalidSyscall()); 228 uint32_t old_sysnum = 0; 229 CodeGen::Node old_node = 230 SyscallSet::IsValid(old_sysnum) 231 ? CompileResult(policy_->EvaluateSyscall(old_sysnum)) 232 : invalid_node; 233 234 for (uint32_t sysnum : SyscallSet::All()) { 235 CodeGen::Node node = 236 SyscallSet::IsValid(sysnum) 237 ? CompileResult(policy_->EvaluateSyscall(static_cast<int>(sysnum))) 238 : invalid_node; 239 // N.B., here we rely on CodeGen folding (i.e., returning the same 240 // node value for) identical code sequences, otherwise our jump 241 // table will blow up in size. 242 if (node != old_node) { 243 ranges->push_back(Range{old_sysnum, old_node}); 244 old_sysnum = sysnum; 245 old_node = node; 246 } 247 } 248 ranges->push_back(Range{old_sysnum, old_node}); 249 } 250 251 CodeGen::Node PolicyCompiler::AssembleJumpTable(Ranges::const_iterator start, 252 Ranges::const_iterator stop) { 253 // We convert the list of system call ranges into jump table that performs 254 // a binary search over the ranges. 255 // As a sanity check, we need to have at least one distinct ranges for us 256 // to be able to build a jump table. 257 CHECK(start < stop) << "Invalid iterator range"; 258 const auto n = stop - start; 259 if (n == 1) { 260 // If we have narrowed things down to a single range object, we can 261 // return from the BPF filter program. 262 return start->node; 263 } 264 265 // Pick the range object that is located at the mid point of our list. 266 // We compare our system call number against the lowest valid system call 267 // number in this range object. If our number is lower, it is outside of 268 // this range object. If it is greater or equal, it might be inside. 269 Ranges::const_iterator mid = start + n / 2; 270 271 // Sub-divide the list of ranges and continue recursively. 272 CodeGen::Node jf = AssembleJumpTable(start, mid); 273 CodeGen::Node jt = AssembleJumpTable(mid, stop); 274 return gen_.MakeInstruction(BPF_JMP + BPF_JGE + BPF_K, mid->from, jt, jf); 275 } 276 277 CodeGen::Node PolicyCompiler::CompileResult(const ResultExpr& res) { 278 return res->Compile(this); 279 } 280 281 CodeGen::Node PolicyCompiler::MaskedEqual(int argno, 282 size_t width, 283 uint64_t mask, 284 uint64_t value, 285 CodeGen::Node passed, 286 CodeGen::Node failed) { 287 // Sanity check that arguments make sense. 288 CHECK(argno >= 0 && argno < 6) << "Invalid argument number " << argno; 289 CHECK(width == 4 || width == 8) << "Invalid argument width " << width; 290 CHECK_NE(0U, mask) << "Zero mask is invalid"; 291 CHECK_EQ(value, value & mask) << "Value contains masked out bits"; 292 if (sizeof(void*) == 4) { 293 CHECK_EQ(4U, width) << "Invalid width on 32-bit platform"; 294 } 295 if (width == 4) { 296 CHECK_EQ(0U, mask >> 32) << "Mask exceeds argument size"; 297 CHECK_EQ(0U, value >> 32) << "Value exceeds argument size"; 298 } 299 300 // We want to emit code to check "(arg & mask) == value" where arg, mask, and 301 // value are 64-bit values, but the BPF machine is only 32-bit. We implement 302 // this by independently testing the upper and lower 32-bits and continuing to 303 // |passed| if both evaluate true, or to |failed| if either evaluate false. 304 return MaskedEqualHalf(argno, width, mask, value, ArgHalf::UPPER, 305 MaskedEqualHalf(argno, width, mask, value, 306 ArgHalf::LOWER, passed, failed), 307 failed); 308 } 309 310 CodeGen::Node PolicyCompiler::MaskedEqualHalf(int argno, 311 size_t width, 312 uint64_t full_mask, 313 uint64_t full_value, 314 ArgHalf half, 315 CodeGen::Node passed, 316 CodeGen::Node failed) { 317 if (width == 4 && half == ArgHalf::UPPER) { 318 // Special logic for sanity checking the upper 32-bits of 32-bit system 319 // call arguments. 320 321 // TODO(mdempsky): Compile Unexpected64bitArgument() just per program. 322 CodeGen::Node invalid_64bit = Unexpected64bitArgument(); 323 324 const uint32_t upper = SECCOMP_ARG_MSB_IDX(argno); 325 const uint32_t lower = SECCOMP_ARG_LSB_IDX(argno); 326 327 if (sizeof(void*) == 4) { 328 // On 32-bit platforms, the upper 32-bits should always be 0: 329 // LDW [upper] 330 // JEQ 0, passed, invalid 331 return gen_.MakeInstruction( 332 BPF_LD + BPF_W + BPF_ABS, 333 upper, 334 gen_.MakeInstruction( 335 BPF_JMP + BPF_JEQ + BPF_K, 0, passed, invalid_64bit)); 336 } 337 338 // On 64-bit platforms, the upper 32-bits may be 0 or ~0; but we only allow 339 // ~0 if the sign bit of the lower 32-bits is set too: 340 // LDW [upper] 341 // JEQ 0, passed, (next) 342 // JEQ ~0, (next), invalid 343 // LDW [lower] 344 // JSET (1<<31), passed, invalid 345 // 346 // TODO(mdempsky): The JSET instruction could perhaps jump to passed->next 347 // instead, as the first instruction of passed should be "LDW [lower]". 348 return gen_.MakeInstruction( 349 BPF_LD + BPF_W + BPF_ABS, 350 upper, 351 gen_.MakeInstruction( 352 BPF_JMP + BPF_JEQ + BPF_K, 353 0, 354 passed, 355 gen_.MakeInstruction( 356 BPF_JMP + BPF_JEQ + BPF_K, 357 std::numeric_limits<uint32_t>::max(), 358 gen_.MakeInstruction( 359 BPF_LD + BPF_W + BPF_ABS, 360 lower, 361 gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 362 1U << 31, 363 passed, 364 invalid_64bit)), 365 invalid_64bit))); 366 } 367 368 const uint32_t idx = (half == ArgHalf::UPPER) ? SECCOMP_ARG_MSB_IDX(argno) 369 : SECCOMP_ARG_LSB_IDX(argno); 370 const uint32_t mask = (half == ArgHalf::UPPER) ? full_mask >> 32 : full_mask; 371 const uint32_t value = 372 (half == ArgHalf::UPPER) ? full_value >> 32 : full_value; 373 374 // Emit a suitable instruction sequence for (arg & mask) == value. 375 376 // For (arg & 0) == 0, just return passed. 377 if (mask == 0) { 378 CHECK_EQ(0U, value); 379 return passed; 380 } 381 382 // For (arg & ~0) == value, emit: 383 // LDW [idx] 384 // JEQ value, passed, failed 385 if (mask == std::numeric_limits<uint32_t>::max()) { 386 return gen_.MakeInstruction( 387 BPF_LD + BPF_W + BPF_ABS, 388 idx, 389 gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, value, passed, failed)); 390 } 391 392 // For (arg & mask) == 0, emit: 393 // LDW [idx] 394 // JSET mask, failed, passed 395 // (Note: failed and passed are intentionally swapped.) 396 if (value == 0) { 397 return gen_.MakeInstruction( 398 BPF_LD + BPF_W + BPF_ABS, 399 idx, 400 gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, mask, failed, passed)); 401 } 402 403 // For (arg & x) == x where x is a single-bit value, emit: 404 // LDW [idx] 405 // JSET mask, passed, failed 406 if (mask == value && HasExactlyOneBit(mask)) { 407 return gen_.MakeInstruction( 408 BPF_LD + BPF_W + BPF_ABS, 409 idx, 410 gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, mask, passed, failed)); 411 } 412 413 // Generic fallback: 414 // LDW [idx] 415 // AND mask 416 // JEQ value, passed, failed 417 return gen_.MakeInstruction( 418 BPF_LD + BPF_W + BPF_ABS, 419 idx, 420 gen_.MakeInstruction( 421 BPF_ALU + BPF_AND + BPF_K, 422 mask, 423 gen_.MakeInstruction( 424 BPF_JMP + BPF_JEQ + BPF_K, value, passed, failed))); 425 } 426 427 CodeGen::Node PolicyCompiler::Unexpected64bitArgument() { 428 return CompileResult(panic_func_("Unexpected 64bit argument detected")); 429 } 430 431 CodeGen::Node PolicyCompiler::Return(uint32_t ret) { 432 if (has_unsafe_traps_ && (ret & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { 433 // When inside an UnsafeTrap() callback, we want to allow all system calls. 434 // This means, we must conditionally disable the sandbox -- and that's not 435 // something that kernel-side BPF filters can do, as they cannot inspect 436 // any state other than the syscall arguments. 437 // But if we redirect all error handlers to user-space, then we can easily 438 // make this decision. 439 // The performance penalty for this extra round-trip to user-space is not 440 // actually that bad, as we only ever pay it for denied system calls; and a 441 // typical program has very few of these. 442 return Trap(ReturnErrno, reinterpret_cast<void*>(ret & SECCOMP_RET_DATA), 443 true); 444 } 445 446 return gen_.MakeInstruction(BPF_RET + BPF_K, ret); 447 } 448 449 CodeGen::Node PolicyCompiler::Trap(TrapRegistry::TrapFnc fnc, 450 const void* aux, 451 bool safe) { 452 uint16_t trap_id = registry_->Add(fnc, aux, safe); 453 return gen_.MakeInstruction(BPF_RET + BPF_K, SECCOMP_RET_TRAP + trap_id); 454 } 455 456 bool PolicyCompiler::IsRequiredForUnsafeTrap(int sysno) { 457 for (size_t i = 0; i < arraysize(kSyscallsRequiredForUnsafeTraps); ++i) { 458 if (sysno == kSyscallsRequiredForUnsafeTraps[i]) { 459 return true; 460 } 461 } 462 return false; 463 } 464 465 } // namespace bpf_dsl 466 } // namespace sandbox 467