1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <errno.h> 6 7 #include <algorithm> 8 #include <set> 9 #include <vector> 10 11 #include "sandbox/linux/seccomp-bpf/codegen.h" 12 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" 13 #include "sandbox/linux/tests/unit_tests.h" 14 15 namespace sandbox { 16 17 class SandboxUnittestHelper : public SandboxBPF { 18 public: 19 typedef SandboxBPF::Program Program; 20 }; 21 22 // We want to access some of the private methods in the code generator. We 23 // do so by defining a "friend" that makes these methods public for us. 24 class CodeGenUnittestHelper : public CodeGen { 25 public: 26 void FindBranchTargets(const Instruction& instructions, 27 BranchTargets *branch_targets) { 28 CodeGen::FindBranchTargets(instructions, branch_targets); 29 } 30 31 BasicBlock *CutGraphIntoBasicBlocks(Instruction *insns, 32 const BranchTargets& branch_targets, 33 TargetsToBlocks *blocks) { 34 return CodeGen::CutGraphIntoBasicBlocks(insns, branch_targets, blocks); 35 } 36 37 void MergeTails(TargetsToBlocks *blocks) { 38 CodeGen::MergeTails(blocks); 39 } 40 }; 41 42 enum { NO_FLAGS = 0x0000, 43 HAS_MERGEABLE_TAILS = 0x0001, 44 }; 45 46 Instruction *SampleProgramOneInstruction(CodeGen *codegen, int *flags) { 47 // Create the most basic valid BPF program: 48 // RET ERR_ALLOWED 49 *flags = NO_FLAGS; 50 return codegen->MakeInstruction(BPF_RET+BPF_K, 51 ErrorCode(ErrorCode::ERR_ALLOWED)); 52 } 53 54 Instruction *SampleProgramSimpleBranch(CodeGen *codegen, int *flags) { 55 // Create a program with a single branch: 56 // JUMP if eq 42 then $0 else $1 57 // 0: RET EPERM 58 // 1: RET ERR_ALLOWED 59 *flags = NO_FLAGS; 60 return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, 61 codegen->MakeInstruction(BPF_RET+BPF_K, 62 ErrorCode(EPERM)), 63 codegen->MakeInstruction(BPF_RET+BPF_K, 64 ErrorCode(ErrorCode::ERR_ALLOWED))); 65 } 66 67 Instruction *SampleProgramAtypicalBranch(CodeGen *codegen, int *flags) { 68 // Create a program with a single branch: 69 // JUMP if eq 42 then $0 else $0 70 // 0: RET ERR_ALLOWED 71 72 // N.B.: As the instructions in both sides of the branch are already 73 // the same object, we do not actually have any "mergeable" branches. 74 // This needs to be reflected in our choice of "flags". 75 *flags = NO_FLAGS; 76 77 Instruction *ret = 78 codegen->MakeInstruction(BPF_RET+BPF_K, 79 ErrorCode(ErrorCode::ERR_ALLOWED)); 80 return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, ret, ret); 81 } 82 83 Instruction *SampleProgramComplex(CodeGen *codegen, int *flags) { 84 // Creates a basic BPF program that we'll use to test some of the code: 85 // JUMP if eq 42 the $0 else $1 (insn6) 86 // 0: LD 23 (insn5) 87 // 1: JUMP if eq 42 then $2 else $4 (insn4) 88 // 2: JUMP to $3 (insn1) 89 // 3: LD 42 (insn0) 90 // RET ErrorCode(42) (insn2) 91 // 4: LD 42 (insn3) 92 // RET ErrorCode(42) (insn3+) 93 *flags = HAS_MERGEABLE_TAILS; 94 95 Instruction *insn0 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42); 96 SANDBOX_ASSERT(insn0); 97 SANDBOX_ASSERT(insn0->code == BPF_LD+BPF_W+BPF_ABS); 98 SANDBOX_ASSERT(insn0->k == 42); 99 SANDBOX_ASSERT(insn0->next == NULL); 100 101 Instruction *insn1 = codegen->MakeInstruction(BPF_JMP+BPF_JA, 0, insn0); 102 SANDBOX_ASSERT(insn1); 103 SANDBOX_ASSERT(insn1->code == BPF_JMP+BPF_JA); 104 SANDBOX_ASSERT(insn1->jt_ptr == insn0); 105 106 Instruction *insn2 = codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42)); 107 SANDBOX_ASSERT(insn2); 108 SANDBOX_ASSERT(insn2->code == BPF_RET+BPF_K); 109 SANDBOX_ASSERT(insn2->next == NULL); 110 111 // We explicitly duplicate instructions so that MergeTails() can coalesce 112 // them later. 113 Instruction *insn3 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42, 114 codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42))); 115 116 Instruction *insn4 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, 117 insn1, insn3); 118 SANDBOX_ASSERT(insn4); 119 SANDBOX_ASSERT(insn4->code == BPF_JMP+BPF_JEQ+BPF_K); 120 SANDBOX_ASSERT(insn4->k == 42); 121 SANDBOX_ASSERT(insn4->jt_ptr == insn1); 122 SANDBOX_ASSERT(insn4->jf_ptr == insn3); 123 124 codegen->JoinInstructions(insn0, insn2); 125 SANDBOX_ASSERT(insn0->next == insn2); 126 127 Instruction *insn5 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 128 23, insn4); 129 SANDBOX_ASSERT(insn5); 130 SANDBOX_ASSERT(insn5->code == BPF_LD+BPF_W+BPF_ABS); 131 SANDBOX_ASSERT(insn5->k == 23); 132 SANDBOX_ASSERT(insn5->next == insn4); 133 134 // Force a basic block that ends in neither a jump instruction nor a return 135 // instruction. It only contains "insn5". This exercises one of the less 136 // common code paths in the topo-sort algorithm. 137 // This also gives us a diamond-shaped pattern in our graph, which stresses 138 // another aspect of the topo-sort algorithm (namely, the ability to 139 // correctly count the incoming branches for subtrees that are not disjunct). 140 Instruction *insn6 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, 141 insn5, insn4); 142 143 return insn6; 144 } 145 146 void ForAllPrograms(void (*test)(CodeGenUnittestHelper *, Instruction *, int)){ 147 Instruction *(*function_table[])(CodeGen *codegen, int *flags) = { 148 SampleProgramOneInstruction, 149 SampleProgramSimpleBranch, 150 SampleProgramAtypicalBranch, 151 SampleProgramComplex, 152 }; 153 154 for (size_t i = 0; i < arraysize(function_table); ++i) { 155 CodeGenUnittestHelper codegen; 156 int flags = NO_FLAGS; 157 Instruction *prg = function_table[i](&codegen, &flags); 158 test(&codegen, prg, flags); 159 } 160 } 161 162 void MakeInstruction(CodeGenUnittestHelper *codegen, 163 Instruction *program, int) { 164 // Nothing to do here 165 } 166 167 SANDBOX_TEST(CodeGen, MakeInstruction) { 168 ForAllPrograms(MakeInstruction); 169 } 170 171 void FindBranchTargets(CodeGenUnittestHelper *codegen, Instruction *prg, int) { 172 BranchTargets branch_targets; 173 codegen->FindBranchTargets(*prg, &branch_targets); 174 175 // Verifying the general properties that should be true for every 176 // well-formed BPF program. 177 // Perform a depth-first traversal of the BPF program an verify that all 178 // targets of BPF_JMP instructions are represented in the "branch_targets". 179 // At the same time, compute a set of both the branch targets and all the 180 // instructions in the program. 181 std::vector<Instruction *> stack; 182 std::set<Instruction *> all_instructions; 183 std::set<Instruction *> target_instructions; 184 BranchTargets::const_iterator end = branch_targets.end(); 185 for (Instruction *insn = prg;;) { 186 all_instructions.insert(insn); 187 if (BPF_CLASS(insn->code) == BPF_JMP) { 188 target_instructions.insert(insn->jt_ptr); 189 SANDBOX_ASSERT(insn->jt_ptr != NULL); 190 SANDBOX_ASSERT(branch_targets.find(insn->jt_ptr) != end); 191 if (BPF_OP(insn->code) != BPF_JA) { 192 target_instructions.insert(insn->jf_ptr); 193 SANDBOX_ASSERT(insn->jf_ptr != NULL); 194 SANDBOX_ASSERT(branch_targets.find(insn->jf_ptr) != end); 195 stack.push_back(insn->jf_ptr); 196 } 197 insn = insn->jt_ptr; 198 } else if (BPF_CLASS(insn->code) == BPF_RET) { 199 SANDBOX_ASSERT(insn->next == NULL); 200 if (stack.empty()) { 201 break; 202 } 203 insn = stack.back(); 204 stack.pop_back(); 205 } else { 206 SANDBOX_ASSERT(insn->next != NULL); 207 insn = insn->next; 208 } 209 } 210 SANDBOX_ASSERT(target_instructions.size() == branch_targets.size()); 211 212 // We can now subtract the set of the branch targets from the set of all 213 // instructions. This gives us a set with the instructions that nobody 214 // ever jumps to. Verify that they are no included in the 215 // "branch_targets" that FindBranchTargets() computed for us. 216 Instructions non_target_instructions(all_instructions.size() - 217 target_instructions.size()); 218 set_difference(all_instructions.begin(), all_instructions.end(), 219 target_instructions.begin(), target_instructions.end(), 220 non_target_instructions.begin()); 221 for (Instructions::const_iterator iter = non_target_instructions.begin(); 222 iter != non_target_instructions.end(); 223 ++iter) { 224 SANDBOX_ASSERT(branch_targets.find(*iter) == end); 225 } 226 } 227 228 SANDBOX_TEST(CodeGen, FindBranchTargets) { 229 ForAllPrograms(FindBranchTargets); 230 } 231 232 void CutGraphIntoBasicBlocks(CodeGenUnittestHelper *codegen, 233 Instruction *prg, int) { 234 BranchTargets branch_targets; 235 codegen->FindBranchTargets(*prg, &branch_targets); 236 TargetsToBlocks all_blocks; 237 BasicBlock *first_block = 238 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks); 239 SANDBOX_ASSERT(first_block != NULL); 240 SANDBOX_ASSERT(first_block->instructions.size() > 0); 241 Instruction *first_insn = first_block->instructions[0]; 242 243 // Basic blocks are supposed to start with a branch target and end with 244 // either a jump or a return instruction. It can also end, if the next 245 // instruction forms the beginning of a new basic block. There should be 246 // no other jumps or return instructions in the middle of a basic block. 247 for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin(); 248 bb_iter != all_blocks.end(); 249 ++bb_iter) { 250 BasicBlock *bb = bb_iter->second; 251 SANDBOX_ASSERT(bb != NULL); 252 SANDBOX_ASSERT(bb->instructions.size() > 0); 253 Instruction *insn = bb->instructions[0]; 254 SANDBOX_ASSERT(insn == first_insn || 255 branch_targets.find(insn) != branch_targets.end()); 256 for (Instructions::const_iterator insn_iter = bb->instructions.begin();;){ 257 insn = *insn_iter; 258 if (++insn_iter != bb->instructions.end()) { 259 SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_JMP); 260 SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_RET); 261 } else { 262 SANDBOX_ASSERT(BPF_CLASS(insn->code) == BPF_JMP || 263 BPF_CLASS(insn->code) == BPF_RET || 264 branch_targets.find(insn->next) != 265 branch_targets.end()); 266 break; 267 } 268 SANDBOX_ASSERT(branch_targets.find(*insn_iter) == branch_targets.end()); 269 } 270 } 271 } 272 273 SANDBOX_TEST(CodeGen, CutGraphIntoBasicBlocks) { 274 ForAllPrograms(CutGraphIntoBasicBlocks); 275 } 276 277 void MergeTails(CodeGenUnittestHelper *codegen, Instruction *prg, 278 int flags) { 279 BranchTargets branch_targets; 280 codegen->FindBranchTargets(*prg, &branch_targets); 281 TargetsToBlocks all_blocks; 282 BasicBlock *first_block = 283 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks); 284 285 // The shape of our graph and thus the function of our program should 286 // still be unchanged after we run MergeTails(). We verify this by 287 // serializing the graph and verifying that it is still the same. 288 // We also verify that at least some of the edges changed because of 289 // tail merging. 290 std::string graph[2]; 291 std::string edges[2]; 292 293 // The loop executes twice. After the first run, we call MergeTails() on 294 // our graph. 295 for (int i = 0;;) { 296 // Traverse the entire program in depth-first order. 297 std::vector<BasicBlock *> stack; 298 for (BasicBlock *bb = first_block;;) { 299 // Serialize the instructions in this basic block. In general, we only 300 // need to serialize "code" and "k"; except for a BPF_JA instruction 301 // where "k" isn't set. 302 // The stream of instructions should be unchanged after MergeTails(). 303 for (Instructions::const_iterator iter = bb->instructions.begin(); 304 iter != bb->instructions.end(); 305 ++iter) { 306 graph[i].append(reinterpret_cast<char *>(&(*iter)->code), 307 sizeof((*iter)->code)); 308 if (BPF_CLASS((*iter)->code) != BPF_JMP || 309 BPF_OP((*iter)->code) != BPF_JA) { 310 graph[i].append(reinterpret_cast<char *>(&(*iter)->k), 311 sizeof((*iter)->k)); 312 } 313 } 314 315 // Also serialize the addresses the basic blocks as we encounter them. 316 // This will change as basic blocks are coalesed by MergeTails(). 317 edges[i].append(reinterpret_cast<char *>(&bb), sizeof(bb)); 318 319 // Depth-first traversal of the graph. We only ever need to look at the 320 // very last instruction in the basic block, as that is the only one that 321 // can change code flow. 322 Instruction *insn = bb->instructions.back(); 323 if (BPF_CLASS(insn->code) == BPF_JMP) { 324 // For jump instructions, we need to remember the "false" branch while 325 // traversing the "true" branch. This is not necessary for BPF_JA which 326 // only has a single branch. 327 if (BPF_OP(insn->code) != BPF_JA) { 328 stack.push_back(all_blocks[insn->jf_ptr]); 329 } 330 bb = all_blocks[insn->jt_ptr]; 331 } else if (BPF_CLASS(insn->code) == BPF_RET) { 332 // After a BPF_RET, see if we need to back track. 333 if (stack.empty()) { 334 break; 335 } 336 bb = stack.back(); 337 stack.pop_back(); 338 } else { 339 // For "normal" instructions, just follow to the next basic block. 340 bb = all_blocks[insn->next]; 341 } 342 } 343 344 // Our loop runs exactly two times. 345 if (++i > 1) { 346 break; 347 } 348 codegen->MergeTails(&all_blocks); 349 } 350 SANDBOX_ASSERT(graph[0] == graph[1]); 351 if (flags & HAS_MERGEABLE_TAILS) { 352 SANDBOX_ASSERT(edges[0] != edges[1]); 353 } else { 354 SANDBOX_ASSERT(edges[0] == edges[1]); 355 } 356 } 357 358 SANDBOX_TEST(CodeGen, MergeTails) { 359 ForAllPrograms(MergeTails); 360 } 361 362 void CompileAndCompare(CodeGenUnittestHelper *codegen, Instruction *prg, int) { 363 // TopoSortBasicBlocks() has internal checks that cause it to fail, if it 364 // detects a problem. Typically, if anything goes wrong, this looks to the 365 // TopoSort algorithm as if there had been cycles in the input data. 366 // This provides a pretty good unittest. 367 // We hand-crafted the program returned by SampleProgram() to exercise 368 // several of the more interesting code-paths. See comments in 369 // SampleProgram() for details. 370 // In addition to relying on the internal consistency checks in the compiler, 371 // we also serialize the graph and the resulting BPF program and compare 372 // them. With the exception of BPF_JA instructions that might have been 373 // inserted, both instruction streams should be equivalent. 374 // As Compile() modifies the instructions, we have to serialize the graph 375 // before calling Compile(). 376 std::string source; 377 Instructions source_stack; 378 for (const Instruction *insn = prg, *next; insn; insn = next) { 379 if (BPF_CLASS(insn->code) == BPF_JMP) { 380 if (BPF_OP(insn->code) == BPF_JA) { 381 // Do not serialize BPF_JA instructions (see above). 382 next = insn->jt_ptr; 383 continue; 384 } else { 385 source_stack.push_back(insn->jf_ptr); 386 next = insn->jt_ptr; 387 } 388 } else if (BPF_CLASS(insn->code) == BPF_RET) { 389 if (source_stack.empty()) { 390 next = NULL; 391 } else { 392 next = source_stack.back(); 393 source_stack.pop_back(); 394 } 395 } else { 396 next = insn->next; 397 } 398 // Only serialize "code" and "k". That's all the information we need to 399 // compare. The rest of the information is encoded in the order of 400 // instructions. 401 source.append(reinterpret_cast<const char *>(&insn->code), 402 sizeof(insn->code)); 403 source.append(reinterpret_cast<const char *>(&insn->k), 404 sizeof(insn->k)); 405 } 406 407 // Compile the program 408 SandboxUnittestHelper::Program bpf; 409 codegen->Compile(prg, &bpf); 410 411 // Serialize the resulting BPF instructions. 412 std::string assembly; 413 std::vector<int> assembly_stack; 414 for (int idx = 0; idx >= 0;) { 415 SANDBOX_ASSERT(idx < (int)bpf.size()); 416 struct sock_filter& insn = bpf[idx]; 417 if (BPF_CLASS(insn.code) == BPF_JMP) { 418 if (BPF_OP(insn.code) == BPF_JA) { 419 // Do not serialize BPF_JA instructions (see above). 420 idx += insn.k + 1; 421 continue; 422 } else { 423 assembly_stack.push_back(idx + insn.jf + 1); 424 idx += insn.jt + 1; 425 } 426 } else if (BPF_CLASS(insn.code) == BPF_RET) { 427 if (assembly_stack.empty()) { 428 idx = -1; 429 } else { 430 idx = assembly_stack.back(); 431 assembly_stack.pop_back(); 432 } 433 } else { 434 ++idx; 435 } 436 // Serialize the same information that we serialized before compilation. 437 assembly.append(reinterpret_cast<char *>(&insn.code), sizeof(insn.code)); 438 assembly.append(reinterpret_cast<char *>(&insn.k), sizeof(insn.k)); 439 } 440 SANDBOX_ASSERT(source == assembly); 441 } 442 443 SANDBOX_TEST(CodeGen, All) { 444 ForAllPrograms(CompileAndCompare); 445 } 446 447 } // namespace sandbox 448