Home | History | Annotate | Download | only in seccomp-bpf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <errno.h>
      6 
      7 #include <algorithm>
      8 #include <set>
      9 #include <vector>
     10 
     11 #include "sandbox/linux/seccomp-bpf/codegen.h"
     12 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
     13 #include "sandbox/linux/tests/unit_tests.h"
     14 
     15 namespace sandbox {
     16 
     17 class SandboxUnittestHelper : public SandboxBPF {
     18  public:
     19   typedef SandboxBPF::Program Program;
     20 };
     21 
     22 // We want to access some of the private methods in the code generator. We
     23 // do so by defining a "friend" that makes these methods public for us.
     24 class CodeGenUnittestHelper : public CodeGen {
     25  public:
     26   void FindBranchTargets(const Instruction& instructions,
     27                          BranchTargets *branch_targets) {
     28     CodeGen::FindBranchTargets(instructions, branch_targets);
     29   }
     30 
     31   BasicBlock *CutGraphIntoBasicBlocks(Instruction *insns,
     32                                       const BranchTargets& branch_targets,
     33                                       TargetsToBlocks *blocks) {
     34     return CodeGen::CutGraphIntoBasicBlocks(insns, branch_targets, blocks);
     35   }
     36 
     37   void MergeTails(TargetsToBlocks *blocks) {
     38     CodeGen::MergeTails(blocks);
     39   }
     40 };
     41 
     42 enum { NO_FLAGS            = 0x0000,
     43        HAS_MERGEABLE_TAILS = 0x0001,
     44 };
     45 
     46 Instruction *SampleProgramOneInstruction(CodeGen *codegen, int *flags) {
     47   // Create the most basic valid BPF program:
     48   //    RET ERR_ALLOWED
     49   *flags = NO_FLAGS;
     50   return codegen->MakeInstruction(BPF_RET+BPF_K,
     51                                   ErrorCode(ErrorCode::ERR_ALLOWED));
     52 }
     53 
     54 Instruction *SampleProgramSimpleBranch(CodeGen *codegen, int *flags) {
     55   // Create a program with a single branch:
     56   //    JUMP if eq 42 then $0 else $1
     57   // 0: RET EPERM
     58   // 1: RET ERR_ALLOWED
     59   *flags = NO_FLAGS;
     60   return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
     61          codegen->MakeInstruction(BPF_RET+BPF_K,
     62                                   ErrorCode(EPERM)),
     63          codegen->MakeInstruction(BPF_RET+BPF_K,
     64                                   ErrorCode(ErrorCode::ERR_ALLOWED)));
     65 }
     66 
     67 Instruction *SampleProgramAtypicalBranch(CodeGen *codegen, int *flags) {
     68   // Create a program with a single branch:
     69   //    JUMP if eq 42 then $0 else $0
     70   // 0: RET ERR_ALLOWED
     71 
     72   // N.B.: As the instructions in both sides of the branch are already
     73   //       the same object, we do not actually have any "mergeable" branches.
     74   //       This needs to be reflected in our choice of "flags".
     75   *flags = NO_FLAGS;
     76 
     77   Instruction *ret =
     78     codegen->MakeInstruction(BPF_RET+BPF_K,
     79                              ErrorCode(ErrorCode::ERR_ALLOWED));
     80   return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, ret, ret);
     81 }
     82 
     83 Instruction *SampleProgramComplex(CodeGen *codegen, int *flags) {
     84   // Creates a basic BPF program that we'll use to test some of the code:
     85   //    JUMP if eq 42 the $0 else $1     (insn6)
     86   // 0: LD 23                            (insn5)
     87   // 1: JUMP if eq 42 then $2 else $4    (insn4)
     88   // 2: JUMP to $3                       (insn1)
     89   // 3: LD 42                            (insn0)
     90   //    RET ErrorCode(42)                (insn2)
     91   // 4: LD 42                            (insn3)
     92   //    RET ErrorCode(42)                (insn3+)
     93   *flags = HAS_MERGEABLE_TAILS;
     94 
     95   Instruction *insn0 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42);
     96   SANDBOX_ASSERT(insn0);
     97   SANDBOX_ASSERT(insn0->code == BPF_LD+BPF_W+BPF_ABS);
     98   SANDBOX_ASSERT(insn0->k == 42);
     99   SANDBOX_ASSERT(insn0->next == NULL);
    100 
    101   Instruction *insn1 = codegen->MakeInstruction(BPF_JMP+BPF_JA, 0, insn0);
    102   SANDBOX_ASSERT(insn1);
    103   SANDBOX_ASSERT(insn1->code == BPF_JMP+BPF_JA);
    104   SANDBOX_ASSERT(insn1->jt_ptr == insn0);
    105 
    106   Instruction *insn2 = codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42));
    107   SANDBOX_ASSERT(insn2);
    108   SANDBOX_ASSERT(insn2->code == BPF_RET+BPF_K);
    109   SANDBOX_ASSERT(insn2->next == NULL);
    110 
    111   // We explicitly duplicate instructions so that MergeTails() can coalesce
    112   // them later.
    113   Instruction *insn3 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42,
    114     codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42)));
    115 
    116   Instruction *insn4 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
    117                                                 insn1, insn3);
    118   SANDBOX_ASSERT(insn4);
    119   SANDBOX_ASSERT(insn4->code == BPF_JMP+BPF_JEQ+BPF_K);
    120   SANDBOX_ASSERT(insn4->k == 42);
    121   SANDBOX_ASSERT(insn4->jt_ptr == insn1);
    122   SANDBOX_ASSERT(insn4->jf_ptr == insn3);
    123 
    124   codegen->JoinInstructions(insn0, insn2);
    125   SANDBOX_ASSERT(insn0->next == insn2);
    126 
    127   Instruction *insn5 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
    128                                                 23, insn4);
    129   SANDBOX_ASSERT(insn5);
    130   SANDBOX_ASSERT(insn5->code == BPF_LD+BPF_W+BPF_ABS);
    131   SANDBOX_ASSERT(insn5->k == 23);
    132   SANDBOX_ASSERT(insn5->next == insn4);
    133 
    134   // Force a basic block that ends in neither a jump instruction nor a return
    135   // instruction. It only contains "insn5". This exercises one of the less
    136   // common code paths in the topo-sort algorithm.
    137   // This also gives us a diamond-shaped pattern in our graph, which stresses
    138   // another aspect of the topo-sort algorithm (namely, the ability to
    139   // correctly count the incoming branches for subtrees that are not disjunct).
    140   Instruction *insn6 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
    141                                                 insn5, insn4);
    142 
    143   return insn6;
    144 }
    145 
    146 void ForAllPrograms(void (*test)(CodeGenUnittestHelper *, Instruction *, int)){
    147   Instruction *(*function_table[])(CodeGen *codegen, int *flags) = {
    148     SampleProgramOneInstruction,
    149     SampleProgramSimpleBranch,
    150     SampleProgramAtypicalBranch,
    151     SampleProgramComplex,
    152   };
    153 
    154   for (size_t i = 0; i < arraysize(function_table); ++i) {
    155     CodeGenUnittestHelper codegen;
    156     int flags = NO_FLAGS;
    157     Instruction *prg = function_table[i](&codegen, &flags);
    158     test(&codegen, prg, flags);
    159   }
    160 }
    161 
    162 void MakeInstruction(CodeGenUnittestHelper *codegen,
    163                      Instruction *program, int) {
    164   // Nothing to do here
    165 }
    166 
    167 SANDBOX_TEST(CodeGen, MakeInstruction) {
    168   ForAllPrograms(MakeInstruction);
    169 }
    170 
    171 void FindBranchTargets(CodeGenUnittestHelper *codegen, Instruction *prg, int) {
    172   BranchTargets branch_targets;
    173   codegen->FindBranchTargets(*prg, &branch_targets);
    174 
    175   // Verifying the general properties that should be true for every
    176   // well-formed BPF program.
    177   // Perform a depth-first traversal of the BPF program an verify that all
    178   // targets of BPF_JMP instructions are represented in the "branch_targets".
    179   // At the same time, compute a set of both the branch targets and all the
    180   // instructions in the program.
    181   std::vector<Instruction *> stack;
    182   std::set<Instruction *> all_instructions;
    183   std::set<Instruction *> target_instructions;
    184   BranchTargets::const_iterator end = branch_targets.end();
    185   for (Instruction *insn = prg;;) {
    186     all_instructions.insert(insn);
    187     if (BPF_CLASS(insn->code) == BPF_JMP) {
    188       target_instructions.insert(insn->jt_ptr);
    189       SANDBOX_ASSERT(insn->jt_ptr != NULL);
    190       SANDBOX_ASSERT(branch_targets.find(insn->jt_ptr) != end);
    191       if (BPF_OP(insn->code) != BPF_JA) {
    192         target_instructions.insert(insn->jf_ptr);
    193         SANDBOX_ASSERT(insn->jf_ptr != NULL);
    194         SANDBOX_ASSERT(branch_targets.find(insn->jf_ptr) != end);
    195         stack.push_back(insn->jf_ptr);
    196       }
    197       insn = insn->jt_ptr;
    198     } else if (BPF_CLASS(insn->code) == BPF_RET) {
    199       SANDBOX_ASSERT(insn->next == NULL);
    200       if (stack.empty()) {
    201         break;
    202       }
    203       insn = stack.back();
    204       stack.pop_back();
    205     } else {
    206       SANDBOX_ASSERT(insn->next != NULL);
    207       insn = insn->next;
    208     }
    209   }
    210   SANDBOX_ASSERT(target_instructions.size() == branch_targets.size());
    211 
    212   // We can now subtract the set of the branch targets from the set of all
    213   // instructions. This gives us a set with the instructions that nobody
    214   // ever jumps to. Verify that they are no included in the
    215   // "branch_targets" that FindBranchTargets() computed for us.
    216   Instructions non_target_instructions(all_instructions.size() -
    217                                        target_instructions.size());
    218   set_difference(all_instructions.begin(), all_instructions.end(),
    219                  target_instructions.begin(), target_instructions.end(),
    220                  non_target_instructions.begin());
    221   for (Instructions::const_iterator iter = non_target_instructions.begin();
    222        iter != non_target_instructions.end();
    223        ++iter) {
    224     SANDBOX_ASSERT(branch_targets.find(*iter) == end);
    225   }
    226 }
    227 
    228 SANDBOX_TEST(CodeGen, FindBranchTargets) {
    229   ForAllPrograms(FindBranchTargets);
    230 }
    231 
    232 void CutGraphIntoBasicBlocks(CodeGenUnittestHelper *codegen,
    233                              Instruction *prg, int) {
    234   BranchTargets branch_targets;
    235   codegen->FindBranchTargets(*prg, &branch_targets);
    236   TargetsToBlocks all_blocks;
    237   BasicBlock *first_block =
    238     codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
    239   SANDBOX_ASSERT(first_block != NULL);
    240   SANDBOX_ASSERT(first_block->instructions.size() > 0);
    241   Instruction *first_insn = first_block->instructions[0];
    242 
    243   // Basic blocks are supposed to start with a branch target and end with
    244   // either a jump or a return instruction. It can also end, if the next
    245   // instruction forms the beginning of a new basic block. There should be
    246   // no other jumps or return instructions in the middle of a basic block.
    247   for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin();
    248        bb_iter != all_blocks.end();
    249        ++bb_iter) {
    250     BasicBlock *bb = bb_iter->second;
    251     SANDBOX_ASSERT(bb != NULL);
    252     SANDBOX_ASSERT(bb->instructions.size() > 0);
    253     Instruction *insn = bb->instructions[0];
    254     SANDBOX_ASSERT(insn == first_insn ||
    255                    branch_targets.find(insn) != branch_targets.end());
    256     for (Instructions::const_iterator insn_iter = bb->instructions.begin();;){
    257       insn = *insn_iter;
    258       if (++insn_iter != bb->instructions.end()) {
    259         SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_JMP);
    260         SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_RET);
    261       } else {
    262         SANDBOX_ASSERT(BPF_CLASS(insn->code) == BPF_JMP ||
    263                        BPF_CLASS(insn->code) == BPF_RET ||
    264                        branch_targets.find(insn->next) !=
    265                          branch_targets.end());
    266         break;
    267       }
    268       SANDBOX_ASSERT(branch_targets.find(*insn_iter) == branch_targets.end());
    269     }
    270   }
    271 }
    272 
    273 SANDBOX_TEST(CodeGen, CutGraphIntoBasicBlocks) {
    274   ForAllPrograms(CutGraphIntoBasicBlocks);
    275 }
    276 
    277 void MergeTails(CodeGenUnittestHelper *codegen, Instruction *prg,
    278                 int flags) {
    279   BranchTargets branch_targets;
    280   codegen->FindBranchTargets(*prg, &branch_targets);
    281   TargetsToBlocks all_blocks;
    282   BasicBlock *first_block =
    283     codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
    284 
    285   // The shape of our graph and thus the function of our program should
    286   // still be unchanged after we run MergeTails(). We verify this by
    287   // serializing the graph and verifying that it is still the same.
    288   // We also verify that at least some of the edges changed because of
    289   // tail merging.
    290   std::string graph[2];
    291   std::string edges[2];
    292 
    293   // The loop executes twice. After the first run, we call MergeTails() on
    294   // our graph.
    295   for (int i = 0;;) {
    296     // Traverse the entire program in depth-first order.
    297     std::vector<BasicBlock *> stack;
    298     for (BasicBlock *bb = first_block;;) {
    299       // Serialize the instructions in this basic block. In general, we only
    300       // need to serialize "code" and "k"; except for a BPF_JA instruction
    301       // where "k" isn't set.
    302       // The stream of instructions should be unchanged after MergeTails().
    303       for (Instructions::const_iterator iter = bb->instructions.begin();
    304            iter != bb->instructions.end();
    305            ++iter) {
    306         graph[i].append(reinterpret_cast<char *>(&(*iter)->code),
    307                         sizeof((*iter)->code));
    308         if (BPF_CLASS((*iter)->code) != BPF_JMP ||
    309             BPF_OP((*iter)->code) != BPF_JA) {
    310           graph[i].append(reinterpret_cast<char *>(&(*iter)->k),
    311                           sizeof((*iter)->k));
    312         }
    313       }
    314 
    315       // Also serialize the addresses the basic blocks as we encounter them.
    316       // This will change as basic blocks are coalesed by MergeTails().
    317       edges[i].append(reinterpret_cast<char *>(&bb), sizeof(bb));
    318 
    319       // Depth-first traversal of the graph. We only ever need to look at the
    320       // very last instruction in the basic block, as that is the only one that
    321       // can change code flow.
    322       Instruction *insn = bb->instructions.back();
    323       if (BPF_CLASS(insn->code) == BPF_JMP) {
    324         // For jump instructions, we need to remember the "false" branch while
    325         // traversing the "true" branch. This is not necessary for BPF_JA which
    326         // only has a single branch.
    327         if (BPF_OP(insn->code) != BPF_JA) {
    328           stack.push_back(all_blocks[insn->jf_ptr]);
    329         }
    330         bb = all_blocks[insn->jt_ptr];
    331       } else if (BPF_CLASS(insn->code) == BPF_RET) {
    332         // After a BPF_RET, see if we need to back track.
    333         if (stack.empty()) {
    334           break;
    335         }
    336         bb = stack.back();
    337         stack.pop_back();
    338       } else {
    339         // For "normal" instructions, just follow to the next basic block.
    340         bb = all_blocks[insn->next];
    341       }
    342     }
    343 
    344     // Our loop runs exactly two times.
    345     if (++i > 1) {
    346       break;
    347     }
    348     codegen->MergeTails(&all_blocks);
    349   }
    350   SANDBOX_ASSERT(graph[0] == graph[1]);
    351   if (flags & HAS_MERGEABLE_TAILS) {
    352     SANDBOX_ASSERT(edges[0] != edges[1]);
    353   } else {
    354     SANDBOX_ASSERT(edges[0] == edges[1]);
    355   }
    356 }
    357 
    358 SANDBOX_TEST(CodeGen, MergeTails) {
    359   ForAllPrograms(MergeTails);
    360 }
    361 
    362 void CompileAndCompare(CodeGenUnittestHelper *codegen, Instruction *prg, int) {
    363   // TopoSortBasicBlocks() has internal checks that cause it to fail, if it
    364   // detects a problem. Typically, if anything goes wrong, this looks to the
    365   // TopoSort algorithm as if there had been cycles in the input data.
    366   // This provides a pretty good unittest.
    367   // We hand-crafted the program returned by SampleProgram() to exercise
    368   // several of the more interesting code-paths. See comments in
    369   // SampleProgram() for details.
    370   // In addition to relying on the internal consistency checks in the compiler,
    371   // we also serialize the graph and the resulting BPF program and compare
    372   // them. With the exception of BPF_JA instructions that might have been
    373   // inserted, both instruction streams should be equivalent.
    374   // As Compile() modifies the instructions, we have to serialize the graph
    375   // before calling Compile().
    376   std::string source;
    377   Instructions source_stack;
    378   for (const Instruction *insn = prg, *next; insn; insn = next) {
    379     if (BPF_CLASS(insn->code) == BPF_JMP) {
    380       if (BPF_OP(insn->code) == BPF_JA) {
    381         // Do not serialize BPF_JA instructions (see above).
    382         next = insn->jt_ptr;
    383         continue;
    384       } else {
    385         source_stack.push_back(insn->jf_ptr);
    386         next = insn->jt_ptr;
    387       }
    388     } else if (BPF_CLASS(insn->code) == BPF_RET) {
    389       if (source_stack.empty()) {
    390         next = NULL;
    391       } else {
    392         next = source_stack.back();
    393         source_stack.pop_back();
    394       }
    395     } else {
    396       next = insn->next;
    397     }
    398     // Only serialize "code" and "k". That's all the information we need to
    399     // compare. The rest of the information is encoded in the order of
    400     // instructions.
    401     source.append(reinterpret_cast<const char *>(&insn->code),
    402                   sizeof(insn->code));
    403     source.append(reinterpret_cast<const char *>(&insn->k),
    404                   sizeof(insn->k));
    405   }
    406 
    407   // Compile the program
    408   SandboxUnittestHelper::Program bpf;
    409   codegen->Compile(prg, &bpf);
    410 
    411   // Serialize the resulting BPF instructions.
    412   std::string assembly;
    413   std::vector<int> assembly_stack;
    414   for (int idx = 0; idx >= 0;) {
    415     SANDBOX_ASSERT(idx < (int)bpf.size());
    416     struct sock_filter& insn = bpf[idx];
    417     if (BPF_CLASS(insn.code) == BPF_JMP) {
    418       if (BPF_OP(insn.code) == BPF_JA) {
    419         // Do not serialize BPF_JA instructions (see above).
    420         idx += insn.k + 1;
    421         continue;
    422       } else {
    423         assembly_stack.push_back(idx + insn.jf + 1);
    424         idx += insn.jt + 1;
    425       }
    426     } else if (BPF_CLASS(insn.code) == BPF_RET) {
    427       if (assembly_stack.empty()) {
    428         idx = -1;
    429       } else {
    430         idx = assembly_stack.back();
    431         assembly_stack.pop_back();
    432       }
    433     } else {
    434       ++idx;
    435     }
    436     // Serialize the same information that we serialized before compilation.
    437     assembly.append(reinterpret_cast<char *>(&insn.code), sizeof(insn.code));
    438     assembly.append(reinterpret_cast<char *>(&insn.k),    sizeof(insn.k));
    439   }
    440   SANDBOX_ASSERT(source == assembly);
    441 }
    442 
    443 SANDBOX_TEST(CodeGen, All) {
    444   ForAllPrograms(CompileAndCompare);
    445 }
    446 
    447 }  // namespace sandbox
    448