1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <algorithm> 6 #include <set> 7 #include <vector> 8 9 #include "sandbox/linux/seccomp-bpf/codegen.h" 10 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" 11 #include "sandbox/linux/tests/unit_tests.h" 12 13 namespace playground2 { 14 15 class SandboxUnittestHelper : public Sandbox { 16 public: 17 typedef Sandbox::Program Program; 18 }; 19 20 // We want to access some of the private methods in the code generator. We 21 // do so by defining a "friend" that makes these methods public for us. 22 class CodeGenUnittestHelper : public CodeGen { 23 public: 24 void FindBranchTargets(const Instruction& instructions, 25 BranchTargets *branch_targets) { 26 CodeGen::FindBranchTargets(instructions, branch_targets); 27 } 28 29 BasicBlock *CutGraphIntoBasicBlocks(Instruction *insns, 30 const BranchTargets& branch_targets, 31 TargetsToBlocks *blocks) { 32 return CodeGen::CutGraphIntoBasicBlocks(insns, branch_targets, blocks); 33 } 34 35 void MergeTails(TargetsToBlocks *blocks) { 36 CodeGen::MergeTails(blocks); 37 } 38 }; 39 40 enum { NO_FLAGS = 0x0000, 41 HAS_MERGEABLE_TAILS = 0x0001, 42 }; 43 44 Instruction *SampleProgramOneInstruction(CodeGen *codegen, int *flags) { 45 // Create the most basic valid BPF program: 46 // RET ERR_ALLOWED 47 *flags = NO_FLAGS; 48 return codegen->MakeInstruction(BPF_RET+BPF_K, 49 ErrorCode(ErrorCode::ERR_ALLOWED)); 50 } 51 52 Instruction *SampleProgramSimpleBranch(CodeGen *codegen, int *flags) { 53 // Create a program with a single branch: 54 // JUMP if eq 42 then $0 else $1 55 // 0: RET EPERM 56 // 1: RET ERR_ALLOWED 57 *flags = NO_FLAGS; 58 return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, 59 codegen->MakeInstruction(BPF_RET+BPF_K, 60 ErrorCode(EPERM)), 61 codegen->MakeInstruction(BPF_RET+BPF_K, 62 ErrorCode(ErrorCode::ERR_ALLOWED))); 63 } 64 65 Instruction *SampleProgramAtypicalBranch(CodeGen *codegen, int *flags) { 66 // Create a program with a single branch: 67 // JUMP if eq 42 then $0 else $0 68 // 0: RET ERR_ALLOWED 69 70 // N.B.: As the instructions in both sides of the branch are already 71 // the same object, we do not actually have any "mergeable" branches. 72 // This needs to be reflected in our choice of "flags". 73 *flags = NO_FLAGS; 74 75 Instruction *ret = 76 codegen->MakeInstruction(BPF_RET+BPF_K, 77 ErrorCode(ErrorCode::ERR_ALLOWED)); 78 return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, ret, ret); 79 } 80 81 Instruction *SampleProgramComplex(CodeGen *codegen, int *flags) { 82 // Creates a basic BPF program that we'll use to test some of the code: 83 // JUMP if eq 42 the $0 else $1 (insn6) 84 // 0: LD 23 (insn5) 85 // 1: JUMP if eq 42 then $2 else $4 (insn4) 86 // 2: JUMP to $3 (insn1) 87 // 3: LD 42 (insn0) 88 // RET ErrorCode(42) (insn2) 89 // 4: LD 42 (insn3) 90 // RET ErrorCode(42) (insn3+) 91 *flags = HAS_MERGEABLE_TAILS; 92 93 Instruction *insn0 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42); 94 SANDBOX_ASSERT(insn0); 95 SANDBOX_ASSERT(insn0->code == BPF_LD+BPF_W+BPF_ABS); 96 SANDBOX_ASSERT(insn0->k == 42); 97 SANDBOX_ASSERT(insn0->next == NULL); 98 99 Instruction *insn1 = codegen->MakeInstruction(BPF_JMP+BPF_JA, 0, insn0); 100 SANDBOX_ASSERT(insn1); 101 SANDBOX_ASSERT(insn1->code == BPF_JMP+BPF_JA); 102 SANDBOX_ASSERT(insn1->jt_ptr == insn0); 103 104 Instruction *insn2 = codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42)); 105 SANDBOX_ASSERT(insn2); 106 SANDBOX_ASSERT(insn2->code == BPF_RET+BPF_K); 107 SANDBOX_ASSERT(insn2->next == NULL); 108 109 // We explicitly duplicate instructions so that MergeTails() can coalesce 110 // them later. 111 Instruction *insn3 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42, 112 codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42))); 113 114 Instruction *insn4 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, 115 insn1, insn3); 116 SANDBOX_ASSERT(insn4); 117 SANDBOX_ASSERT(insn4->code == BPF_JMP+BPF_JEQ+BPF_K); 118 SANDBOX_ASSERT(insn4->k == 42); 119 SANDBOX_ASSERT(insn4->jt_ptr == insn1); 120 SANDBOX_ASSERT(insn4->jf_ptr == insn3); 121 122 codegen->JoinInstructions(insn0, insn2); 123 SANDBOX_ASSERT(insn0->next == insn2); 124 125 Instruction *insn5 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 126 23, insn4); 127 SANDBOX_ASSERT(insn5); 128 SANDBOX_ASSERT(insn5->code == BPF_LD+BPF_W+BPF_ABS); 129 SANDBOX_ASSERT(insn5->k == 23); 130 SANDBOX_ASSERT(insn5->next == insn4); 131 132 // Force a basic block that ends in neither a jump instruction nor a return 133 // instruction. It only contains "insn5". This exercises one of the less 134 // common code paths in the topo-sort algorithm. 135 // This also gives us a diamond-shaped pattern in our graph, which stresses 136 // another aspect of the topo-sort algorithm (namely, the ability to 137 // correctly count the incoming branches for subtrees that are not disjunct). 138 Instruction *insn6 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, 139 insn5, insn4); 140 141 return insn6; 142 } 143 144 void ForAllPrograms(void (*test)(CodeGenUnittestHelper *, Instruction *, int)){ 145 Instruction *(*function_table[])(CodeGen *codegen, int *flags) = { 146 SampleProgramOneInstruction, 147 SampleProgramSimpleBranch, 148 SampleProgramAtypicalBranch, 149 SampleProgramComplex, 150 }; 151 152 for (size_t i = 0; i < arraysize(function_table); ++i) { 153 CodeGenUnittestHelper codegen; 154 int flags = NO_FLAGS; 155 Instruction *prg = function_table[i](&codegen, &flags); 156 test(&codegen, prg, flags); 157 } 158 } 159 160 void MakeInstruction(CodeGenUnittestHelper *codegen, 161 Instruction *program, int) { 162 // Nothing to do here 163 } 164 165 SANDBOX_TEST(CodeGen, MakeInstruction) { 166 ForAllPrograms(MakeInstruction); 167 } 168 169 void FindBranchTargets(CodeGenUnittestHelper *codegen, Instruction *prg, int) { 170 BranchTargets branch_targets; 171 codegen->FindBranchTargets(*prg, &branch_targets); 172 173 // Verifying the general properties that should be true for every 174 // well-formed BPF program. 175 // Perform a depth-first traversal of the BPF program an verify that all 176 // targets of BPF_JMP instructions are represented in the "branch_targets". 177 // At the same time, compute a set of both the branch targets and all the 178 // instructions in the program. 179 std::vector<Instruction *> stack; 180 std::set<Instruction *> all_instructions; 181 std::set<Instruction *> target_instructions; 182 BranchTargets::const_iterator end = branch_targets.end(); 183 for (Instruction *insn = prg;;) { 184 all_instructions.insert(insn); 185 if (BPF_CLASS(insn->code) == BPF_JMP) { 186 target_instructions.insert(insn->jt_ptr); 187 SANDBOX_ASSERT(insn->jt_ptr != NULL); 188 SANDBOX_ASSERT(branch_targets.find(insn->jt_ptr) != end); 189 if (BPF_OP(insn->code) != BPF_JA) { 190 target_instructions.insert(insn->jf_ptr); 191 SANDBOX_ASSERT(insn->jf_ptr != NULL); 192 SANDBOX_ASSERT(branch_targets.find(insn->jf_ptr) != end); 193 stack.push_back(insn->jf_ptr); 194 } 195 insn = insn->jt_ptr; 196 } else if (BPF_CLASS(insn->code) == BPF_RET) { 197 SANDBOX_ASSERT(insn->next == NULL); 198 if (stack.empty()) { 199 break; 200 } 201 insn = stack.back(); 202 stack.pop_back(); 203 } else { 204 SANDBOX_ASSERT(insn->next != NULL); 205 insn = insn->next; 206 } 207 } 208 SANDBOX_ASSERT(target_instructions.size() == branch_targets.size()); 209 210 // We can now subtract the set of the branch targets from the set of all 211 // instructions. This gives us a set with the instructions that nobody 212 // ever jumps to. Verify that they are no included in the 213 // "branch_targets" that FindBranchTargets() computed for us. 214 Instructions non_target_instructions(all_instructions.size() - 215 target_instructions.size()); 216 set_difference(all_instructions.begin(), all_instructions.end(), 217 target_instructions.begin(), target_instructions.end(), 218 non_target_instructions.begin()); 219 for (Instructions::const_iterator iter = non_target_instructions.begin(); 220 iter != non_target_instructions.end(); 221 ++iter) { 222 SANDBOX_ASSERT(branch_targets.find(*iter) == end); 223 } 224 } 225 226 SANDBOX_TEST(CodeGen, FindBranchTargets) { 227 ForAllPrograms(FindBranchTargets); 228 } 229 230 void CutGraphIntoBasicBlocks(CodeGenUnittestHelper *codegen, 231 Instruction *prg, int) { 232 BranchTargets branch_targets; 233 codegen->FindBranchTargets(*prg, &branch_targets); 234 TargetsToBlocks all_blocks; 235 BasicBlock *first_block = 236 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks); 237 SANDBOX_ASSERT(first_block != NULL); 238 SANDBOX_ASSERT(first_block->instructions.size() > 0); 239 Instruction *first_insn = first_block->instructions[0]; 240 241 // Basic blocks are supposed to start with a branch target and end with 242 // either a jump or a return instruction. It can also end, if the next 243 // instruction forms the beginning of a new basic block. There should be 244 // no other jumps or return instructions in the middle of a basic block. 245 for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin(); 246 bb_iter != all_blocks.end(); 247 ++bb_iter) { 248 BasicBlock *bb = bb_iter->second; 249 SANDBOX_ASSERT(bb != NULL); 250 SANDBOX_ASSERT(bb->instructions.size() > 0); 251 Instruction *insn = bb->instructions[0]; 252 SANDBOX_ASSERT(insn == first_insn || 253 branch_targets.find(insn) != branch_targets.end()); 254 for (Instructions::const_iterator insn_iter = bb->instructions.begin();;){ 255 insn = *insn_iter; 256 if (++insn_iter != bb->instructions.end()) { 257 SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_JMP); 258 SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_RET); 259 } else { 260 SANDBOX_ASSERT(BPF_CLASS(insn->code) == BPF_JMP || 261 BPF_CLASS(insn->code) == BPF_RET || 262 branch_targets.find(insn->next) != 263 branch_targets.end()); 264 break; 265 } 266 SANDBOX_ASSERT(branch_targets.find(*insn_iter) == branch_targets.end()); 267 } 268 } 269 } 270 271 SANDBOX_TEST(CodeGen, CutGraphIntoBasicBlocks) { 272 ForAllPrograms(CutGraphIntoBasicBlocks); 273 } 274 275 void MergeTails(CodeGenUnittestHelper *codegen, Instruction *prg, 276 int flags) { 277 BranchTargets branch_targets; 278 codegen->FindBranchTargets(*prg, &branch_targets); 279 TargetsToBlocks all_blocks; 280 BasicBlock *first_block = 281 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks); 282 283 // The shape of our graph and thus the function of our program should 284 // still be unchanged after we run MergeTails(). We verify this by 285 // serializing the graph and verifying that it is still the same. 286 // We also verify that at least some of the edges changed because of 287 // tail merging. 288 std::string graph[2]; 289 std::string edges[2]; 290 291 // The loop executes twice. After the first run, we call MergeTails() on 292 // our graph. 293 for (int i = 0;;) { 294 // Traverse the entire program in depth-first order. 295 std::vector<BasicBlock *> stack; 296 for (BasicBlock *bb = first_block;;) { 297 // Serialize the instructions in this basic block. In general, we only 298 // need to serialize "code" and "k"; except for a BPF_JA instruction 299 // where "k" isn't set. 300 // The stream of instructions should be unchanged after MergeTails(). 301 for (Instructions::const_iterator iter = bb->instructions.begin(); 302 iter != bb->instructions.end(); 303 ++iter) { 304 graph[i].append(reinterpret_cast<char *>(&(*iter)->code), 305 sizeof((*iter)->code)); 306 if (BPF_CLASS((*iter)->code) != BPF_JMP || 307 BPF_OP((*iter)->code) != BPF_JA) { 308 graph[i].append(reinterpret_cast<char *>(&(*iter)->k), 309 sizeof((*iter)->k)); 310 } 311 } 312 313 // Also serialize the addresses the basic blocks as we encounter them. 314 // This will change as basic blocks are coalesed by MergeTails(). 315 edges[i].append(reinterpret_cast<char *>(&bb), sizeof(bb)); 316 317 // Depth-first traversal of the graph. We only ever need to look at the 318 // very last instruction in the basic block, as that is the only one that 319 // can change code flow. 320 Instruction *insn = bb->instructions.back(); 321 if (BPF_CLASS(insn->code) == BPF_JMP) { 322 // For jump instructions, we need to remember the "false" branch while 323 // traversing the "true" branch. This is not necessary for BPF_JA which 324 // only has a single branch. 325 if (BPF_OP(insn->code) != BPF_JA) { 326 stack.push_back(all_blocks[insn->jf_ptr]); 327 } 328 bb = all_blocks[insn->jt_ptr]; 329 } else if (BPF_CLASS(insn->code) == BPF_RET) { 330 // After a BPF_RET, see if we need to back track. 331 if (stack.empty()) { 332 break; 333 } 334 bb = stack.back(); 335 stack.pop_back(); 336 } else { 337 // For "normal" instructions, just follow to the next basic block. 338 bb = all_blocks[insn->next]; 339 } 340 } 341 342 // Our loop runs exactly two times. 343 if (++i > 1) { 344 break; 345 } 346 codegen->MergeTails(&all_blocks); 347 } 348 SANDBOX_ASSERT(graph[0] == graph[1]); 349 if (flags & HAS_MERGEABLE_TAILS) { 350 SANDBOX_ASSERT(edges[0] != edges[1]); 351 } else { 352 SANDBOX_ASSERT(edges[0] == edges[1]); 353 } 354 } 355 356 SANDBOX_TEST(CodeGen, MergeTails) { 357 ForAllPrograms(MergeTails); 358 } 359 360 void CompileAndCompare(CodeGenUnittestHelper *codegen, Instruction *prg, int) { 361 // TopoSortBasicBlocks() has internal checks that cause it to fail, if it 362 // detects a problem. Typically, if anything goes wrong, this looks to the 363 // TopoSort algorithm as if there had been cycles in the input data. 364 // This provides a pretty good unittest. 365 // We hand-crafted the program returned by SampleProgram() to exercise 366 // several of the more interesting code-paths. See comments in 367 // SampleProgram() for details. 368 // In addition to relying on the internal consistency checks in the compiler, 369 // we also serialize the graph and the resulting BPF program and compare 370 // them. With the exception of BPF_JA instructions that might have been 371 // inserted, both instruction streams should be equivalent. 372 // As Compile() modifies the instructions, we have to serialize the graph 373 // before calling Compile(). 374 std::string source; 375 Instructions source_stack; 376 for (const Instruction *insn = prg, *next; insn; insn = next) { 377 if (BPF_CLASS(insn->code) == BPF_JMP) { 378 if (BPF_OP(insn->code) == BPF_JA) { 379 // Do not serialize BPF_JA instructions (see above). 380 next = insn->jt_ptr; 381 continue; 382 } else { 383 source_stack.push_back(insn->jf_ptr); 384 next = insn->jt_ptr; 385 } 386 } else if (BPF_CLASS(insn->code) == BPF_RET) { 387 if (source_stack.empty()) { 388 next = NULL; 389 } else { 390 next = source_stack.back(); 391 source_stack.pop_back(); 392 } 393 } else { 394 next = insn->next; 395 } 396 // Only serialize "code" and "k". That's all the information we need to 397 // compare. The rest of the information is encoded in the order of 398 // instructions. 399 source.append(reinterpret_cast<const char *>(&insn->code), 400 sizeof(insn->code)); 401 source.append(reinterpret_cast<const char *>(&insn->k), 402 sizeof(insn->k)); 403 } 404 405 // Compile the program 406 SandboxUnittestHelper::Program bpf; 407 codegen->Compile(prg, &bpf); 408 409 // Serialize the resulting BPF instructions. 410 std::string assembly; 411 std::vector<int> assembly_stack; 412 for (int idx = 0; idx >= 0;) { 413 SANDBOX_ASSERT(idx < (int)bpf.size()); 414 struct sock_filter& insn = bpf[idx]; 415 if (BPF_CLASS(insn.code) == BPF_JMP) { 416 if (BPF_OP(insn.code) == BPF_JA) { 417 // Do not serialize BPF_JA instructions (see above). 418 idx += insn.k + 1; 419 continue; 420 } else { 421 assembly_stack.push_back(idx + insn.jf + 1); 422 idx += insn.jt + 1; 423 } 424 } else if (BPF_CLASS(insn.code) == BPF_RET) { 425 if (assembly_stack.empty()) { 426 idx = -1; 427 } else { 428 idx = assembly_stack.back(); 429 assembly_stack.pop_back(); 430 } 431 } else { 432 ++idx; 433 } 434 // Serialize the same information that we serialized before compilation. 435 assembly.append(reinterpret_cast<char *>(&insn.code), sizeof(insn.code)); 436 assembly.append(reinterpret_cast<char *>(&insn.k), sizeof(insn.k)); 437 } 438 SANDBOX_ASSERT(source == assembly); 439 } 440 441 SANDBOX_TEST(CodeGen, All) { 442 ForAllPrograms(CompileAndCompare); 443 } 444 445 } // namespace playground2 446