1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "sandbox/linux/seccomp-bpf/codegen.h" 6 7 #include <errno.h> 8 #include <linux/filter.h> 9 10 #include <set> 11 #include <string> 12 #include <vector> 13 14 #include "sandbox/linux/seccomp-bpf/basicblock.h" 15 #include "sandbox/linux/seccomp-bpf/errorcode.h" 16 #include "sandbox/linux/seccomp-bpf/instruction.h" 17 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" 18 #include "sandbox/linux/tests/unit_tests.h" 19 20 namespace sandbox { 21 22 class SandboxUnittestHelper : public SandboxBPF { 23 public: 24 typedef SandboxBPF::Program Program; 25 }; 26 27 // We want to access some of the private methods in the code generator. We 28 // do so by defining a "friend" that makes these methods public for us. 29 class CodeGenUnittestHelper : public CodeGen { 30 public: 31 void FindBranchTargets(const Instruction& instructions, 32 BranchTargets* branch_targets) { 33 CodeGen::FindBranchTargets(instructions, branch_targets); 34 } 35 36 BasicBlock* CutGraphIntoBasicBlocks(Instruction* insns, 37 const BranchTargets& branch_targets, 38 TargetsToBlocks* blocks) { 39 return CodeGen::CutGraphIntoBasicBlocks(insns, branch_targets, blocks); 40 } 41 42 void MergeTails(TargetsToBlocks* blocks) { CodeGen::MergeTails(blocks); } 43 }; 44 45 enum { NO_FLAGS = 0x0000, HAS_MERGEABLE_TAILS = 0x0001, }; 46 47 Instruction* SampleProgramOneInstruction(CodeGen* codegen, int* flags) { 48 // Create the most basic valid BPF program: 49 // RET 0 50 *flags = NO_FLAGS; 51 return codegen->MakeInstruction(BPF_RET + BPF_K, 0); 52 } 53 54 Instruction* SampleProgramSimpleBranch(CodeGen* codegen, int* flags) { 55 // Create a program with a single branch: 56 // JUMP if eq 42 then $0 else $1 57 // 0: RET 1 58 // 1: RET 0 59 *flags = NO_FLAGS; 60 return codegen->MakeInstruction( 61 BPF_JMP + BPF_JEQ + BPF_K, 62 42, 63 codegen->MakeInstruction(BPF_RET + BPF_K, 1), 64 codegen->MakeInstruction(BPF_RET + BPF_K, 0)); 65 } 66 67 Instruction* SampleProgramAtypicalBranch(CodeGen* codegen, int* flags) { 68 // Create a program with a single branch: 69 // JUMP if eq 42 then $0 else $0 70 // 0: RET 0 71 72 // N.B.: As the instructions in both sides of the branch are already 73 // the same object, we do not actually have any "mergeable" branches. 74 // This needs to be reflected in our choice of "flags". 75 *flags = NO_FLAGS; 76 77 Instruction* ret = codegen->MakeInstruction( 78 BPF_RET + BPF_K, 0); 79 return codegen->MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret); 80 } 81 82 Instruction* SampleProgramComplex(CodeGen* codegen, int* flags) { 83 // Creates a basic BPF program that we'll use to test some of the code: 84 // JUMP if eq 42 the $0 else $1 (insn6) 85 // 0: LD 23 (insn5) 86 // 1: JUMP if eq 42 then $2 else $4 (insn4) 87 // 2: JUMP to $3 (insn2) 88 // 3: LD 42 (insn1) 89 // RET 42 (insn0) 90 // 4: LD 42 (insn3) 91 // RET 42 (insn3+) 92 *flags = HAS_MERGEABLE_TAILS; 93 94 Instruction* insn0 = codegen->MakeInstruction(BPF_RET + BPF_K, 42); 95 SANDBOX_ASSERT(insn0); 96 SANDBOX_ASSERT(insn0->code == BPF_RET + BPF_K); 97 SANDBOX_ASSERT(insn0->next == NULL); 98 99 Instruction* insn1 = 100 codegen->MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0); 101 SANDBOX_ASSERT(insn1); 102 SANDBOX_ASSERT(insn1->code == BPF_LD + BPF_W + BPF_ABS); 103 SANDBOX_ASSERT(insn1->k == 42); 104 SANDBOX_ASSERT(insn1->next == insn0); 105 106 Instruction* insn2 = codegen->MakeInstruction(BPF_JMP + BPF_JA, 0, insn1); 107 SANDBOX_ASSERT(insn2); 108 SANDBOX_ASSERT(insn2->code == BPF_JMP + BPF_JA); 109 SANDBOX_ASSERT(insn2->jt_ptr == insn1); 110 111 // We explicitly duplicate instructions so that MergeTails() can coalesce 112 // them later. 113 Instruction* insn3 = codegen->MakeInstruction( 114 BPF_LD + BPF_W + BPF_ABS, 115 42, 116 codegen->MakeInstruction(BPF_RET + BPF_K, 42)); 117 118 Instruction* insn4 = 119 codegen->MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3); 120 SANDBOX_ASSERT(insn4); 121 SANDBOX_ASSERT(insn4->code == BPF_JMP + BPF_JEQ + BPF_K); 122 SANDBOX_ASSERT(insn4->k == 42); 123 SANDBOX_ASSERT(insn4->jt_ptr == insn2); 124 SANDBOX_ASSERT(insn4->jf_ptr == insn3); 125 126 Instruction* insn5 = 127 codegen->MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4); 128 SANDBOX_ASSERT(insn5); 129 SANDBOX_ASSERT(insn5->code == BPF_LD + BPF_W + BPF_ABS); 130 SANDBOX_ASSERT(insn5->k == 23); 131 SANDBOX_ASSERT(insn5->next == insn4); 132 133 // Force a basic block that ends in neither a jump instruction nor a return 134 // instruction. It only contains "insn5". This exercises one of the less 135 // common code paths in the topo-sort algorithm. 136 // This also gives us a diamond-shaped pattern in our graph, which stresses 137 // another aspect of the topo-sort algorithm (namely, the ability to 138 // correctly count the incoming branches for subtrees that are not disjunct). 139 Instruction* insn6 = 140 codegen->MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4); 141 142 return insn6; 143 } 144 145 Instruction* SampleProgramConfusingTails(CodeGen* codegen, int* flags) { 146 // This simple program demonstrates https://crbug.com/351103/ 147 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could 148 // be tempted to merge them since they are the same. However, they are 149 // not mergeable because they fall-through to non semantically equivalent 150 // blocks. 151 // Without the fix for this bug, this program should trigger the check in 152 // CompileAndCompare: the serialized graphs from the program and its compiled 153 // version will differ. 154 // 155 // 0) LOAD 1 // ??? 156 // 1) if A == 0x1; then JMP 2 else JMP 3 157 // 2) LOAD 0 // System call number 158 // 3) if A == 0x2; then JMP 4 else JMP 5 159 // 4) LOAD 0 // System call number 160 // 5) if A == 0x1; then JMP 6 else JMP 7 161 // 6) RET 0 162 // 7) RET 1 163 *flags = NO_FLAGS; 164 165 Instruction* i7 = codegen->MakeInstruction(BPF_RET + BPF_K, 1); 166 Instruction* i6 = codegen->MakeInstruction(BPF_RET + BPF_K, 0); 167 Instruction* i5 = 168 codegen->MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); 169 Instruction* i4 = codegen->MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); 170 Instruction* i3 = 171 codegen->MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); 172 Instruction* i2 = codegen->MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); 173 Instruction* i1 = 174 codegen->MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); 175 Instruction* i0 = codegen->MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); 176 177 return i0; 178 } 179 180 Instruction* SampleProgramConfusingTailsBasic(CodeGen* codegen, int* flags) { 181 // Without the fix for https://crbug.com/351103/, (see 182 // SampleProgramConfusingTails()), this would generate a cyclic graph and 183 // crash as the two "LOAD 0" instructions would get merged. 184 // 185 // 0) LOAD 1 // ??? 186 // 1) if A == 0x1; then JMP 2 else JMP 3 187 // 2) LOAD 0 // System call number 188 // 3) if A == 0x2; then JMP 4 else JMP 5 189 // 4) LOAD 0 // System call number 190 // 5) RET 1 191 *flags = NO_FLAGS; 192 193 Instruction* i5 = codegen->MakeInstruction(BPF_RET + BPF_K, 1); 194 Instruction* i4 = codegen->MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); 195 Instruction* i3 = 196 codegen->MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); 197 Instruction* i2 = codegen->MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); 198 Instruction* i1 = 199 codegen->MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); 200 Instruction* i0 = codegen->MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); 201 202 return i0; 203 } 204 205 Instruction* SampleProgramConfusingTailsMergeable(CodeGen* codegen, 206 int* flags) { 207 // This is similar to SampleProgramConfusingTails(), except that 208 // instructions 2 and 4 are now RET instructions. 209 // In PointerCompare(), this exercises the path where two blocks are of the 210 // same length and identical and the last instruction is a JMP or RET, so the 211 // following blocks don't need to be looked at and the blocks are mergeable. 212 // 213 // 0) LOAD 1 // ??? 214 // 1) if A == 0x1; then JMP 2 else JMP 3 215 // 2) RET 42 216 // 3) if A == 0x2; then JMP 4 else JMP 5 217 // 4) RET 42 218 // 5) if A == 0x1; then JMP 6 else JMP 7 219 // 6) RET 0 220 // 7) RET 1 221 *flags = HAS_MERGEABLE_TAILS; 222 223 Instruction* i7 = codegen->MakeInstruction(BPF_RET + BPF_K, 1); 224 Instruction* i6 = codegen->MakeInstruction(BPF_RET + BPF_K, 0); 225 Instruction* i5 = 226 codegen->MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); 227 Instruction* i4 = codegen->MakeInstruction(BPF_RET + BPF_K, 42); 228 Instruction* i3 = 229 codegen->MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); 230 Instruction* i2 = codegen->MakeInstruction(BPF_RET + BPF_K, 42); 231 Instruction* i1 = 232 codegen->MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); 233 Instruction* i0 = codegen->MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); 234 235 return i0; 236 } 237 void ForAllPrograms(void (*test)(CodeGenUnittestHelper*, Instruction*, int)) { 238 Instruction* (*function_table[])(CodeGen* codegen, int* flags) = { 239 SampleProgramOneInstruction, 240 SampleProgramSimpleBranch, 241 SampleProgramAtypicalBranch, 242 SampleProgramComplex, 243 SampleProgramConfusingTails, 244 SampleProgramConfusingTailsBasic, 245 SampleProgramConfusingTailsMergeable, 246 }; 247 248 for (size_t i = 0; i < arraysize(function_table); ++i) { 249 CodeGenUnittestHelper codegen; 250 int flags = NO_FLAGS; 251 Instruction *prg = function_table[i](&codegen, &flags); 252 test(&codegen, prg, flags); 253 } 254 } 255 256 void MakeInstruction(CodeGenUnittestHelper* codegen, 257 Instruction* program, int) { 258 // Nothing to do here 259 } 260 261 SANDBOX_TEST(CodeGen, MakeInstruction) { 262 ForAllPrograms(MakeInstruction); 263 } 264 265 void FindBranchTargets(CodeGenUnittestHelper* codegen, Instruction* prg, int) { 266 BranchTargets branch_targets; 267 codegen->FindBranchTargets(*prg, &branch_targets); 268 269 // Verifying the general properties that should be true for every 270 // well-formed BPF program. 271 // Perform a depth-first traversal of the BPF program an verify that all 272 // targets of BPF_JMP instructions are represented in the "branch_targets". 273 // At the same time, compute a set of both the branch targets and all the 274 // instructions in the program. 275 std::vector<Instruction*> stack; 276 std::set<Instruction*> all_instructions; 277 std::set<Instruction*> target_instructions; 278 BranchTargets::const_iterator end = branch_targets.end(); 279 for (Instruction* insn = prg;;) { 280 all_instructions.insert(insn); 281 if (BPF_CLASS(insn->code) == BPF_JMP) { 282 target_instructions.insert(insn->jt_ptr); 283 SANDBOX_ASSERT(insn->jt_ptr != NULL); 284 SANDBOX_ASSERT(branch_targets.find(insn->jt_ptr) != end); 285 if (BPF_OP(insn->code) != BPF_JA) { 286 target_instructions.insert(insn->jf_ptr); 287 SANDBOX_ASSERT(insn->jf_ptr != NULL); 288 SANDBOX_ASSERT(branch_targets.find(insn->jf_ptr) != end); 289 stack.push_back(insn->jf_ptr); 290 } 291 insn = insn->jt_ptr; 292 } else if (BPF_CLASS(insn->code) == BPF_RET) { 293 SANDBOX_ASSERT(insn->next == NULL); 294 if (stack.empty()) { 295 break; 296 } 297 insn = stack.back(); 298 stack.pop_back(); 299 } else { 300 SANDBOX_ASSERT(insn->next != NULL); 301 insn = insn->next; 302 } 303 } 304 SANDBOX_ASSERT(target_instructions.size() == branch_targets.size()); 305 306 // We can now subtract the set of the branch targets from the set of all 307 // instructions. This gives us a set with the instructions that nobody 308 // ever jumps to. Verify that they are no included in the 309 // "branch_targets" that FindBranchTargets() computed for us. 310 Instructions non_target_instructions(all_instructions.size() - 311 target_instructions.size()); 312 set_difference(all_instructions.begin(), 313 all_instructions.end(), 314 target_instructions.begin(), 315 target_instructions.end(), 316 non_target_instructions.begin()); 317 for (Instructions::const_iterator iter = non_target_instructions.begin(); 318 iter != non_target_instructions.end(); 319 ++iter) { 320 SANDBOX_ASSERT(branch_targets.find(*iter) == end); 321 } 322 } 323 324 SANDBOX_TEST(CodeGen, FindBranchTargets) { ForAllPrograms(FindBranchTargets); } 325 326 void CutGraphIntoBasicBlocks(CodeGenUnittestHelper* codegen, 327 Instruction* prg, 328 int) { 329 BranchTargets branch_targets; 330 codegen->FindBranchTargets(*prg, &branch_targets); 331 TargetsToBlocks all_blocks; 332 BasicBlock* first_block = 333 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks); 334 SANDBOX_ASSERT(first_block != NULL); 335 SANDBOX_ASSERT(first_block->instructions.size() > 0); 336 Instruction* first_insn = first_block->instructions[0]; 337 338 // Basic blocks are supposed to start with a branch target and end with 339 // either a jump or a return instruction. It can also end, if the next 340 // instruction forms the beginning of a new basic block. There should be 341 // no other jumps or return instructions in the middle of a basic block. 342 for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin(); 343 bb_iter != all_blocks.end(); 344 ++bb_iter) { 345 BasicBlock* bb = bb_iter->second; 346 SANDBOX_ASSERT(bb != NULL); 347 SANDBOX_ASSERT(bb->instructions.size() > 0); 348 Instruction* insn = bb->instructions[0]; 349 SANDBOX_ASSERT(insn == first_insn || 350 branch_targets.find(insn) != branch_targets.end()); 351 for (Instructions::const_iterator insn_iter = bb->instructions.begin();;) { 352 insn = *insn_iter; 353 if (++insn_iter != bb->instructions.end()) { 354 SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_JMP); 355 SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_RET); 356 } else { 357 SANDBOX_ASSERT(BPF_CLASS(insn->code) == BPF_JMP || 358 BPF_CLASS(insn->code) == BPF_RET || 359 branch_targets.find(insn->next) != branch_targets.end()); 360 break; 361 } 362 SANDBOX_ASSERT(branch_targets.find(*insn_iter) == branch_targets.end()); 363 } 364 } 365 } 366 367 SANDBOX_TEST(CodeGen, CutGraphIntoBasicBlocks) { 368 ForAllPrograms(CutGraphIntoBasicBlocks); 369 } 370 371 void MergeTails(CodeGenUnittestHelper* codegen, Instruction* prg, int flags) { 372 BranchTargets branch_targets; 373 codegen->FindBranchTargets(*prg, &branch_targets); 374 TargetsToBlocks all_blocks; 375 BasicBlock* first_block = 376 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks); 377 378 // The shape of our graph and thus the function of our program should 379 // still be unchanged after we run MergeTails(). We verify this by 380 // serializing the graph and verifying that it is still the same. 381 // We also verify that at least some of the edges changed because of 382 // tail merging. 383 std::string graph[2]; 384 std::string edges[2]; 385 386 // The loop executes twice. After the first run, we call MergeTails() on 387 // our graph. 388 for (int i = 0;;) { 389 // Traverse the entire program in depth-first order. 390 std::vector<BasicBlock*> stack; 391 for (BasicBlock* bb = first_block;;) { 392 // Serialize the instructions in this basic block. In general, we only 393 // need to serialize "code" and "k"; except for a BPF_JA instruction 394 // where "k" isn't set. 395 // The stream of instructions should be unchanged after MergeTails(). 396 for (Instructions::const_iterator iter = bb->instructions.begin(); 397 iter != bb->instructions.end(); 398 ++iter) { 399 graph[i].append(reinterpret_cast<char*>(&(*iter)->code), 400 sizeof((*iter)->code)); 401 if (BPF_CLASS((*iter)->code) != BPF_JMP || 402 BPF_OP((*iter)->code) != BPF_JA) { 403 graph[i].append(reinterpret_cast<char*>(&(*iter)->k), 404 sizeof((*iter)->k)); 405 } 406 } 407 408 // Also serialize the addresses the basic blocks as we encounter them. 409 // This will change as basic blocks are coalesed by MergeTails(). 410 edges[i].append(reinterpret_cast<char*>(&bb), sizeof(bb)); 411 412 // Depth-first traversal of the graph. We only ever need to look at the 413 // very last instruction in the basic block, as that is the only one that 414 // can change code flow. 415 Instruction* insn = bb->instructions.back(); 416 if (BPF_CLASS(insn->code) == BPF_JMP) { 417 // For jump instructions, we need to remember the "false" branch while 418 // traversing the "true" branch. This is not necessary for BPF_JA which 419 // only has a single branch. 420 if (BPF_OP(insn->code) != BPF_JA) { 421 stack.push_back(all_blocks[insn->jf_ptr]); 422 } 423 bb = all_blocks[insn->jt_ptr]; 424 } else if (BPF_CLASS(insn->code) == BPF_RET) { 425 // After a BPF_RET, see if we need to back track. 426 if (stack.empty()) { 427 break; 428 } 429 bb = stack.back(); 430 stack.pop_back(); 431 } else { 432 // For "normal" instructions, just follow to the next basic block. 433 bb = all_blocks[insn->next]; 434 } 435 } 436 437 // Our loop runs exactly two times. 438 if (++i > 1) { 439 break; 440 } 441 codegen->MergeTails(&all_blocks); 442 } 443 SANDBOX_ASSERT(graph[0] == graph[1]); 444 if (flags & HAS_MERGEABLE_TAILS) { 445 SANDBOX_ASSERT(edges[0] != edges[1]); 446 } else { 447 SANDBOX_ASSERT(edges[0] == edges[1]); 448 } 449 } 450 451 SANDBOX_TEST(CodeGen, MergeTails) { 452 ForAllPrograms(MergeTails); 453 } 454 455 void CompileAndCompare(CodeGenUnittestHelper* codegen, Instruction* prg, int) { 456 // TopoSortBasicBlocks() has internal checks that cause it to fail, if it 457 // detects a problem. Typically, if anything goes wrong, this looks to the 458 // TopoSort algorithm as if there had been cycles in the input data. 459 // This provides a pretty good unittest. 460 // We hand-crafted the program returned by SampleProgram() to exercise 461 // several of the more interesting code-paths. See comments in 462 // SampleProgram() for details. 463 // In addition to relying on the internal consistency checks in the compiler, 464 // we also serialize the graph and the resulting BPF program and compare 465 // them. With the exception of BPF_JA instructions that might have been 466 // inserted, both instruction streams should be equivalent. 467 // As Compile() modifies the instructions, we have to serialize the graph 468 // before calling Compile(). 469 std::string source; 470 Instructions source_stack; 471 for (const Instruction* insn = prg, *next; insn; insn = next) { 472 if (BPF_CLASS(insn->code) == BPF_JMP) { 473 if (BPF_OP(insn->code) == BPF_JA) { 474 // Do not serialize BPF_JA instructions (see above). 475 next = insn->jt_ptr; 476 continue; 477 } else { 478 source_stack.push_back(insn->jf_ptr); 479 next = insn->jt_ptr; 480 } 481 } else if (BPF_CLASS(insn->code) == BPF_RET) { 482 if (source_stack.empty()) { 483 next = NULL; 484 } else { 485 next = source_stack.back(); 486 source_stack.pop_back(); 487 } 488 } else { 489 next = insn->next; 490 } 491 // Only serialize "code" and "k". That's all the information we need to 492 // compare. The rest of the information is encoded in the order of 493 // instructions. 494 source.append(reinterpret_cast<const char*>(&insn->code), 495 sizeof(insn->code)); 496 source.append(reinterpret_cast<const char*>(&insn->k), sizeof(insn->k)); 497 } 498 499 // Compile the program 500 SandboxUnittestHelper::Program bpf; 501 codegen->Compile(prg, &bpf); 502 503 // Serialize the resulting BPF instructions. 504 std::string assembly; 505 std::vector<int> assembly_stack; 506 for (int idx = 0; idx >= 0;) { 507 SANDBOX_ASSERT(idx < (int)bpf.size()); 508 struct sock_filter& insn = bpf[idx]; 509 if (BPF_CLASS(insn.code) == BPF_JMP) { 510 if (BPF_OP(insn.code) == BPF_JA) { 511 // Do not serialize BPF_JA instructions (see above). 512 idx += insn.k + 1; 513 continue; 514 } else { 515 assembly_stack.push_back(idx + insn.jf + 1); 516 idx += insn.jt + 1; 517 } 518 } else if (BPF_CLASS(insn.code) == BPF_RET) { 519 if (assembly_stack.empty()) { 520 idx = -1; 521 } else { 522 idx = assembly_stack.back(); 523 assembly_stack.pop_back(); 524 } 525 } else { 526 ++idx; 527 } 528 // Serialize the same information that we serialized before compilation. 529 assembly.append(reinterpret_cast<char*>(&insn.code), sizeof(insn.code)); 530 assembly.append(reinterpret_cast<char*>(&insn.k), sizeof(insn.k)); 531 } 532 SANDBOX_ASSERT(source == assembly); 533 } 534 535 SANDBOX_TEST(CodeGen, All) { 536 ForAllPrograms(CompileAndCompare); 537 } 538 539 } // namespace sandbox 540