Home | History | Annotate | Download | only in seccomp-bpf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <algorithm>
      6 #include <set>
      7 #include <vector>
      8 
      9 #include "sandbox/linux/seccomp-bpf/codegen.h"
     10 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
     11 #include "sandbox/linux/tests/unit_tests.h"
     12 
     13 namespace playground2 {
     14 
     15 class SandboxUnittestHelper : public Sandbox {
     16  public:
     17   typedef Sandbox::Program Program;
     18 };
     19 
     20 // We want to access some of the private methods in the code generator. We
     21 // do so by defining a "friend" that makes these methods public for us.
     22 class CodeGenUnittestHelper : public CodeGen {
     23  public:
     24   void FindBranchTargets(const Instruction& instructions,
     25                          BranchTargets *branch_targets) {
     26     CodeGen::FindBranchTargets(instructions, branch_targets);
     27   }
     28 
     29   BasicBlock *CutGraphIntoBasicBlocks(Instruction *insns,
     30                                       const BranchTargets& branch_targets,
     31                                       TargetsToBlocks *blocks) {
     32     return CodeGen::CutGraphIntoBasicBlocks(insns, branch_targets, blocks);
     33   }
     34 
     35   void MergeTails(TargetsToBlocks *blocks) {
     36     CodeGen::MergeTails(blocks);
     37   }
     38 };
     39 
     40 enum { NO_FLAGS            = 0x0000,
     41        HAS_MERGEABLE_TAILS = 0x0001,
     42 };
     43 
     44 Instruction *SampleProgramOneInstruction(CodeGen *codegen, int *flags) {
     45   // Create the most basic valid BPF program:
     46   //    RET ERR_ALLOWED
     47   *flags = NO_FLAGS;
     48   return codegen->MakeInstruction(BPF_RET+BPF_K,
     49                                   ErrorCode(ErrorCode::ERR_ALLOWED));
     50 }
     51 
     52 Instruction *SampleProgramSimpleBranch(CodeGen *codegen, int *flags) {
     53   // Create a program with a single branch:
     54   //    JUMP if eq 42 then $0 else $1
     55   // 0: RET EPERM
     56   // 1: RET ERR_ALLOWED
     57   *flags = NO_FLAGS;
     58   return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
     59          codegen->MakeInstruction(BPF_RET+BPF_K,
     60                                   ErrorCode(EPERM)),
     61          codegen->MakeInstruction(BPF_RET+BPF_K,
     62                                   ErrorCode(ErrorCode::ERR_ALLOWED)));
     63 }
     64 
     65 Instruction *SampleProgramAtypicalBranch(CodeGen *codegen, int *flags) {
     66   // Create a program with a single branch:
     67   //    JUMP if eq 42 then $0 else $0
     68   // 0: RET ERR_ALLOWED
     69 
     70   // N.B.: As the instructions in both sides of the branch are already
     71   //       the same object, we do not actually have any "mergeable" branches.
     72   //       This needs to be reflected in our choice of "flags".
     73   *flags = NO_FLAGS;
     74 
     75   Instruction *ret =
     76     codegen->MakeInstruction(BPF_RET+BPF_K,
     77                              ErrorCode(ErrorCode::ERR_ALLOWED));
     78   return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, ret, ret);
     79 }
     80 
     81 Instruction *SampleProgramComplex(CodeGen *codegen, int *flags) {
     82   // Creates a basic BPF program that we'll use to test some of the code:
     83   //    JUMP if eq 42 the $0 else $1     (insn6)
     84   // 0: LD 23                            (insn5)
     85   // 1: JUMP if eq 42 then $2 else $4    (insn4)
     86   // 2: JUMP to $3                       (insn1)
     87   // 3: LD 42                            (insn0)
     88   //    RET ErrorCode(42)                (insn2)
     89   // 4: LD 42                            (insn3)
     90   //    RET ErrorCode(42)                (insn3+)
     91   *flags = HAS_MERGEABLE_TAILS;
     92 
     93   Instruction *insn0 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42);
     94   SANDBOX_ASSERT(insn0);
     95   SANDBOX_ASSERT(insn0->code == BPF_LD+BPF_W+BPF_ABS);
     96   SANDBOX_ASSERT(insn0->k == 42);
     97   SANDBOX_ASSERT(insn0->next == NULL);
     98 
     99   Instruction *insn1 = codegen->MakeInstruction(BPF_JMP+BPF_JA, 0, insn0);
    100   SANDBOX_ASSERT(insn1);
    101   SANDBOX_ASSERT(insn1->code == BPF_JMP+BPF_JA);
    102   SANDBOX_ASSERT(insn1->jt_ptr == insn0);
    103 
    104   Instruction *insn2 = codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42));
    105   SANDBOX_ASSERT(insn2);
    106   SANDBOX_ASSERT(insn2->code == BPF_RET+BPF_K);
    107   SANDBOX_ASSERT(insn2->next == NULL);
    108 
    109   // We explicitly duplicate instructions so that MergeTails() can coalesce
    110   // them later.
    111   Instruction *insn3 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42,
    112     codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42)));
    113 
    114   Instruction *insn4 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
    115                                                 insn1, insn3);
    116   SANDBOX_ASSERT(insn4);
    117   SANDBOX_ASSERT(insn4->code == BPF_JMP+BPF_JEQ+BPF_K);
    118   SANDBOX_ASSERT(insn4->k == 42);
    119   SANDBOX_ASSERT(insn4->jt_ptr == insn1);
    120   SANDBOX_ASSERT(insn4->jf_ptr == insn3);
    121 
    122   codegen->JoinInstructions(insn0, insn2);
    123   SANDBOX_ASSERT(insn0->next == insn2);
    124 
    125   Instruction *insn5 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
    126                                                 23, insn4);
    127   SANDBOX_ASSERT(insn5);
    128   SANDBOX_ASSERT(insn5->code == BPF_LD+BPF_W+BPF_ABS);
    129   SANDBOX_ASSERT(insn5->k == 23);
    130   SANDBOX_ASSERT(insn5->next == insn4);
    131 
    132   // Force a basic block that ends in neither a jump instruction nor a return
    133   // instruction. It only contains "insn5". This exercises one of the less
    134   // common code paths in the topo-sort algorithm.
    135   // This also gives us a diamond-shaped pattern in our graph, which stresses
    136   // another aspect of the topo-sort algorithm (namely, the ability to
    137   // correctly count the incoming branches for subtrees that are not disjunct).
    138   Instruction *insn6 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
    139                                                 insn5, insn4);
    140 
    141   return insn6;
    142 }
    143 
    144 void ForAllPrograms(void (*test)(CodeGenUnittestHelper *, Instruction *, int)){
    145   Instruction *(*function_table[])(CodeGen *codegen, int *flags) = {
    146     SampleProgramOneInstruction,
    147     SampleProgramSimpleBranch,
    148     SampleProgramAtypicalBranch,
    149     SampleProgramComplex,
    150   };
    151 
    152   for (size_t i = 0; i < arraysize(function_table); ++i) {
    153     CodeGenUnittestHelper codegen;
    154     int flags = NO_FLAGS;
    155     Instruction *prg = function_table[i](&codegen, &flags);
    156     test(&codegen, prg, flags);
    157   }
    158 }
    159 
    160 void MakeInstruction(CodeGenUnittestHelper *codegen,
    161                      Instruction *program, int) {
    162   // Nothing to do here
    163 }
    164 
    165 SANDBOX_TEST(CodeGen, MakeInstruction) {
    166   ForAllPrograms(MakeInstruction);
    167 }
    168 
    169 void FindBranchTargets(CodeGenUnittestHelper *codegen, Instruction *prg, int) {
    170   BranchTargets branch_targets;
    171   codegen->FindBranchTargets(*prg, &branch_targets);
    172 
    173   // Verifying the general properties that should be true for every
    174   // well-formed BPF program.
    175   // Perform a depth-first traversal of the BPF program an verify that all
    176   // targets of BPF_JMP instructions are represented in the "branch_targets".
    177   // At the same time, compute a set of both the branch targets and all the
    178   // instructions in the program.
    179   std::vector<Instruction *> stack;
    180   std::set<Instruction *> all_instructions;
    181   std::set<Instruction *> target_instructions;
    182   BranchTargets::const_iterator end = branch_targets.end();
    183   for (Instruction *insn = prg;;) {
    184     all_instructions.insert(insn);
    185     if (BPF_CLASS(insn->code) == BPF_JMP) {
    186       target_instructions.insert(insn->jt_ptr);
    187       SANDBOX_ASSERT(insn->jt_ptr != NULL);
    188       SANDBOX_ASSERT(branch_targets.find(insn->jt_ptr) != end);
    189       if (BPF_OP(insn->code) != BPF_JA) {
    190         target_instructions.insert(insn->jf_ptr);
    191         SANDBOX_ASSERT(insn->jf_ptr != NULL);
    192         SANDBOX_ASSERT(branch_targets.find(insn->jf_ptr) != end);
    193         stack.push_back(insn->jf_ptr);
    194       }
    195       insn = insn->jt_ptr;
    196     } else if (BPF_CLASS(insn->code) == BPF_RET) {
    197       SANDBOX_ASSERT(insn->next == NULL);
    198       if (stack.empty()) {
    199         break;
    200       }
    201       insn = stack.back();
    202       stack.pop_back();
    203     } else {
    204       SANDBOX_ASSERT(insn->next != NULL);
    205       insn = insn->next;
    206     }
    207   }
    208   SANDBOX_ASSERT(target_instructions.size() == branch_targets.size());
    209 
    210   // We can now subtract the set of the branch targets from the set of all
    211   // instructions. This gives us a set with the instructions that nobody
    212   // ever jumps to. Verify that they are no included in the
    213   // "branch_targets" that FindBranchTargets() computed for us.
    214   Instructions non_target_instructions(all_instructions.size() -
    215                                        target_instructions.size());
    216   set_difference(all_instructions.begin(), all_instructions.end(),
    217                  target_instructions.begin(), target_instructions.end(),
    218                  non_target_instructions.begin());
    219   for (Instructions::const_iterator iter = non_target_instructions.begin();
    220        iter != non_target_instructions.end();
    221        ++iter) {
    222     SANDBOX_ASSERT(branch_targets.find(*iter) == end);
    223   }
    224 }
    225 
    226 SANDBOX_TEST(CodeGen, FindBranchTargets) {
    227   ForAllPrograms(FindBranchTargets);
    228 }
    229 
    230 void CutGraphIntoBasicBlocks(CodeGenUnittestHelper *codegen,
    231                              Instruction *prg, int) {
    232   BranchTargets branch_targets;
    233   codegen->FindBranchTargets(*prg, &branch_targets);
    234   TargetsToBlocks all_blocks;
    235   BasicBlock *first_block =
    236     codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
    237   SANDBOX_ASSERT(first_block != NULL);
    238   SANDBOX_ASSERT(first_block->instructions.size() > 0);
    239   Instruction *first_insn = first_block->instructions[0];
    240 
    241   // Basic blocks are supposed to start with a branch target and end with
    242   // either a jump or a return instruction. It can also end, if the next
    243   // instruction forms the beginning of a new basic block. There should be
    244   // no other jumps or return instructions in the middle of a basic block.
    245   for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin();
    246        bb_iter != all_blocks.end();
    247        ++bb_iter) {
    248     BasicBlock *bb = bb_iter->second;
    249     SANDBOX_ASSERT(bb != NULL);
    250     SANDBOX_ASSERT(bb->instructions.size() > 0);
    251     Instruction *insn = bb->instructions[0];
    252     SANDBOX_ASSERT(insn == first_insn ||
    253                    branch_targets.find(insn) != branch_targets.end());
    254     for (Instructions::const_iterator insn_iter = bb->instructions.begin();;){
    255       insn = *insn_iter;
    256       if (++insn_iter != bb->instructions.end()) {
    257         SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_JMP);
    258         SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_RET);
    259       } else {
    260         SANDBOX_ASSERT(BPF_CLASS(insn->code) == BPF_JMP ||
    261                        BPF_CLASS(insn->code) == BPF_RET ||
    262                        branch_targets.find(insn->next) !=
    263                          branch_targets.end());
    264         break;
    265       }
    266       SANDBOX_ASSERT(branch_targets.find(*insn_iter) == branch_targets.end());
    267     }
    268   }
    269 }
    270 
    271 SANDBOX_TEST(CodeGen, CutGraphIntoBasicBlocks) {
    272   ForAllPrograms(CutGraphIntoBasicBlocks);
    273 }
    274 
    275 void MergeTails(CodeGenUnittestHelper *codegen, Instruction *prg,
    276                 int flags) {
    277   BranchTargets branch_targets;
    278   codegen->FindBranchTargets(*prg, &branch_targets);
    279   TargetsToBlocks all_blocks;
    280   BasicBlock *first_block =
    281     codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
    282 
    283   // The shape of our graph and thus the function of our program should
    284   // still be unchanged after we run MergeTails(). We verify this by
    285   // serializing the graph and verifying that it is still the same.
    286   // We also verify that at least some of the edges changed because of
    287   // tail merging.
    288   std::string graph[2];
    289   std::string edges[2];
    290 
    291   // The loop executes twice. After the first run, we call MergeTails() on
    292   // our graph.
    293   for (int i = 0;;) {
    294     // Traverse the entire program in depth-first order.
    295     std::vector<BasicBlock *> stack;
    296     for (BasicBlock *bb = first_block;;) {
    297       // Serialize the instructions in this basic block. In general, we only
    298       // need to serialize "code" and "k"; except for a BPF_JA instruction
    299       // where "k" isn't set.
    300       // The stream of instructions should be unchanged after MergeTails().
    301       for (Instructions::const_iterator iter = bb->instructions.begin();
    302            iter != bb->instructions.end();
    303            ++iter) {
    304         graph[i].append(reinterpret_cast<char *>(&(*iter)->code),
    305                         sizeof((*iter)->code));
    306         if (BPF_CLASS((*iter)->code) != BPF_JMP ||
    307             BPF_OP((*iter)->code) != BPF_JA) {
    308           graph[i].append(reinterpret_cast<char *>(&(*iter)->k),
    309                           sizeof((*iter)->k));
    310         }
    311       }
    312 
    313       // Also serialize the addresses the basic blocks as we encounter them.
    314       // This will change as basic blocks are coalesed by MergeTails().
    315       edges[i].append(reinterpret_cast<char *>(&bb), sizeof(bb));
    316 
    317       // Depth-first traversal of the graph. We only ever need to look at the
    318       // very last instruction in the basic block, as that is the only one that
    319       // can change code flow.
    320       Instruction *insn = bb->instructions.back();
    321       if (BPF_CLASS(insn->code) == BPF_JMP) {
    322         // For jump instructions, we need to remember the "false" branch while
    323         // traversing the "true" branch. This is not necessary for BPF_JA which
    324         // only has a single branch.
    325         if (BPF_OP(insn->code) != BPF_JA) {
    326           stack.push_back(all_blocks[insn->jf_ptr]);
    327         }
    328         bb = all_blocks[insn->jt_ptr];
    329       } else if (BPF_CLASS(insn->code) == BPF_RET) {
    330         // After a BPF_RET, see if we need to back track.
    331         if (stack.empty()) {
    332           break;
    333         }
    334         bb = stack.back();
    335         stack.pop_back();
    336       } else {
    337         // For "normal" instructions, just follow to the next basic block.
    338         bb = all_blocks[insn->next];
    339       }
    340     }
    341 
    342     // Our loop runs exactly two times.
    343     if (++i > 1) {
    344       break;
    345     }
    346     codegen->MergeTails(&all_blocks);
    347   }
    348   SANDBOX_ASSERT(graph[0] == graph[1]);
    349   if (flags & HAS_MERGEABLE_TAILS) {
    350     SANDBOX_ASSERT(edges[0] != edges[1]);
    351   } else {
    352     SANDBOX_ASSERT(edges[0] == edges[1]);
    353   }
    354 }
    355 
    356 SANDBOX_TEST(CodeGen, MergeTails) {
    357   ForAllPrograms(MergeTails);
    358 }
    359 
    360 void CompileAndCompare(CodeGenUnittestHelper *codegen, Instruction *prg, int) {
    361   // TopoSortBasicBlocks() has internal checks that cause it to fail, if it
    362   // detects a problem. Typically, if anything goes wrong, this looks to the
    363   // TopoSort algorithm as if there had been cycles in the input data.
    364   // This provides a pretty good unittest.
    365   // We hand-crafted the program returned by SampleProgram() to exercise
    366   // several of the more interesting code-paths. See comments in
    367   // SampleProgram() for details.
    368   // In addition to relying on the internal consistency checks in the compiler,
    369   // we also serialize the graph and the resulting BPF program and compare
    370   // them. With the exception of BPF_JA instructions that might have been
    371   // inserted, both instruction streams should be equivalent.
    372   // As Compile() modifies the instructions, we have to serialize the graph
    373   // before calling Compile().
    374   std::string source;
    375   Instructions source_stack;
    376   for (const Instruction *insn = prg, *next; insn; insn = next) {
    377     if (BPF_CLASS(insn->code) == BPF_JMP) {
    378       if (BPF_OP(insn->code) == BPF_JA) {
    379         // Do not serialize BPF_JA instructions (see above).
    380         next = insn->jt_ptr;
    381         continue;
    382       } else {
    383         source_stack.push_back(insn->jf_ptr);
    384         next = insn->jt_ptr;
    385       }
    386     } else if (BPF_CLASS(insn->code) == BPF_RET) {
    387       if (source_stack.empty()) {
    388         next = NULL;
    389       } else {
    390         next = source_stack.back();
    391         source_stack.pop_back();
    392       }
    393     } else {
    394       next = insn->next;
    395     }
    396     // Only serialize "code" and "k". That's all the information we need to
    397     // compare. The rest of the information is encoded in the order of
    398     // instructions.
    399     source.append(reinterpret_cast<const char *>(&insn->code),
    400                   sizeof(insn->code));
    401     source.append(reinterpret_cast<const char *>(&insn->k),
    402                   sizeof(insn->k));
    403   }
    404 
    405   // Compile the program
    406   SandboxUnittestHelper::Program bpf;
    407   codegen->Compile(prg, &bpf);
    408 
    409   // Serialize the resulting BPF instructions.
    410   std::string assembly;
    411   std::vector<int> assembly_stack;
    412   for (int idx = 0; idx >= 0;) {
    413     SANDBOX_ASSERT(idx < (int)bpf.size());
    414     struct sock_filter& insn = bpf[idx];
    415     if (BPF_CLASS(insn.code) == BPF_JMP) {
    416       if (BPF_OP(insn.code) == BPF_JA) {
    417         // Do not serialize BPF_JA instructions (see above).
    418         idx += insn.k + 1;
    419         continue;
    420       } else {
    421         assembly_stack.push_back(idx + insn.jf + 1);
    422         idx += insn.jt + 1;
    423       }
    424     } else if (BPF_CLASS(insn.code) == BPF_RET) {
    425       if (assembly_stack.empty()) {
    426         idx = -1;
    427       } else {
    428         idx = assembly_stack.back();
    429         assembly_stack.pop_back();
    430       }
    431     } else {
    432       ++idx;
    433     }
    434     // Serialize the same information that we serialized before compilation.
    435     assembly.append(reinterpret_cast<char *>(&insn.code), sizeof(insn.code));
    436     assembly.append(reinterpret_cast<char *>(&insn.k),    sizeof(insn.k));
    437   }
    438   SANDBOX_ASSERT(source == assembly);
    439 }
    440 
    441 SANDBOX_TEST(CodeGen, All) {
    442   ForAllPrograms(CompileAndCompare);
    443 }
    444 
    445 }  // namespace playground2
    446