Home | History | Annotate | Download | only in seccomp-bpf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <stdio.h>
      6 
      7 #include "base/logging.h"
      8 #include "sandbox/linux/seccomp-bpf/codegen.h"
      9 
     10 namespace {
     11 
     12 // Helper function for Traverse().
     13 void TraverseRecursively(std::set<sandbox::Instruction*>* visited,
     14                          sandbox::Instruction* instruction) {
     15   if (visited->find(instruction) == visited->end()) {
     16     visited->insert(instruction);
     17     switch (BPF_CLASS(instruction->code)) {
     18       case BPF_JMP:
     19         if (BPF_OP(instruction->code) != BPF_JA) {
     20           TraverseRecursively(visited, instruction->jf_ptr);
     21         }
     22         TraverseRecursively(visited, instruction->jt_ptr);
     23         break;
     24       case BPF_RET:
     25         break;
     26       default:
     27         TraverseRecursively(visited, instruction->next);
     28         break;
     29     }
     30   }
     31 }
     32 
     33 }  // namespace
     34 
     35 namespace sandbox {
     36 
     37 CodeGen::CodeGen() : compiled_(false) {}
     38 
     39 CodeGen::~CodeGen() {
     40   for (Instructions::iterator iter = instructions_.begin();
     41        iter != instructions_.end();
     42        ++iter) {
     43     delete *iter;
     44   }
     45   for (BasicBlocks::iterator iter = basic_blocks_.begin();
     46        iter != basic_blocks_.end();
     47        ++iter) {
     48     delete *iter;
     49   }
     50 }
     51 
     52 void CodeGen::PrintProgram(const SandboxBPF::Program& program) {
     53   for (SandboxBPF::Program::const_iterator iter = program.begin();
     54        iter != program.end();
     55        ++iter) {
     56     int ip = (int)(iter - program.begin());
     57     fprintf(stderr, "%3d) ", ip);
     58     switch (BPF_CLASS(iter->code)) {
     59       case BPF_LD:
     60         if (iter->code == BPF_LD + BPF_W + BPF_ABS) {
     61           fprintf(stderr, "LOAD %d  // ", (int)iter->k);
     62           if (iter->k == offsetof(struct arch_seccomp_data, nr)) {
     63             fprintf(stderr, "System call number\n");
     64           } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) {
     65             fprintf(stderr, "Architecture\n");
     66           } else if (iter->k ==
     67                      offsetof(struct arch_seccomp_data, instruction_pointer)) {
     68             fprintf(stderr, "Instruction pointer (LSB)\n");
     69           } else if (iter->k ==
     70                      offsetof(struct arch_seccomp_data, instruction_pointer) +
     71                          4) {
     72             fprintf(stderr, "Instruction pointer (MSB)\n");
     73           } else if (iter->k >= offsetof(struct arch_seccomp_data, args) &&
     74                      iter->k < offsetof(struct arch_seccomp_data, args) + 48 &&
     75                      (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 ==
     76                          0) {
     77             fprintf(
     78                 stderr,
     79                 "Argument %d (%cSB)\n",
     80                 (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8,
     81                 (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M'
     82                                                                          : 'L');
     83           } else {
     84             fprintf(stderr, "???\n");
     85           }
     86         } else {
     87           fprintf(stderr, "LOAD ???\n");
     88         }
     89         break;
     90       case BPF_JMP:
     91         if (BPF_OP(iter->code) == BPF_JA) {
     92           fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
     93         } else {
     94           fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
     95               BPF_OP(iter->code) == BPF_JSET ? "&" :
     96               BPF_OP(iter->code) == BPF_JEQ ? "==" :
     97               BPF_OP(iter->code) == BPF_JGE ? ">=" :
     98               BPF_OP(iter->code) == BPF_JGT ? ">"  : "???",
     99               (int)iter->k,
    100               ip + iter->jt + 1, ip + iter->jf + 1);
    101         }
    102         break;
    103       case BPF_RET:
    104         fprintf(stderr, "RET 0x%x  // ", iter->k);
    105         if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) {
    106           fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA);
    107         } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
    108           fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA);
    109         } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE) {
    110           fprintf(stderr, "Trace #%d\n", iter->k & SECCOMP_RET_DATA);
    111         } else if (iter->k == SECCOMP_RET_ALLOW) {
    112           fprintf(stderr, "Allowed\n");
    113         } else {
    114           fprintf(stderr, "???\n");
    115         }
    116         break;
    117       case BPF_ALU:
    118         fprintf(stderr, BPF_OP(iter->code) == BPF_NEG
    119             ? "A := -A\n" : "A := A %s 0x%x\n",
    120             BPF_OP(iter->code) == BPF_ADD ? "+"  :
    121             BPF_OP(iter->code) == BPF_SUB ? "-"  :
    122             BPF_OP(iter->code) == BPF_MUL ? "*"  :
    123             BPF_OP(iter->code) == BPF_DIV ? "/"  :
    124             BPF_OP(iter->code) == BPF_MOD ? "%"  :
    125             BPF_OP(iter->code) == BPF_OR  ? "|"  :
    126             BPF_OP(iter->code) == BPF_XOR ? "^"  :
    127             BPF_OP(iter->code) == BPF_AND ? "&"  :
    128             BPF_OP(iter->code) == BPF_LSH ? "<<" :
    129             BPF_OP(iter->code) == BPF_RSH ? ">>" : "???",
    130             (int)iter->k);
    131         break;
    132       default:
    133         fprintf(stderr, "???\n");
    134         break;
    135     }
    136   }
    137   return;
    138 }
    139 
    140 Instruction* CodeGen::MakeInstruction(uint16_t code,
    141                                       uint32_t k,
    142                                       Instruction* next) {
    143   // We can handle non-jumping instructions and "always" jumps. Both of
    144   // them are followed by exactly one "next" instruction.
    145   // We allow callers to defer specifying "next", but then they must call
    146   // "joinInstructions" later.
    147   if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
    148     SANDBOX_DIE(
    149         "Must provide both \"true\" and \"false\" branch "
    150         "for a BPF_JMP");
    151   }
    152   if (next && BPF_CLASS(code) == BPF_RET) {
    153     SANDBOX_DIE("Cannot append instructions after a return statement");
    154   }
    155   if (BPF_CLASS(code) == BPF_JMP) {
    156     // "Always" jumps use the "true" branch target, only.
    157     Instruction* insn = new Instruction(code, 0, next, NULL);
    158     instructions_.push_back(insn);
    159     return insn;
    160   } else {
    161     // Non-jumping instructions do not use any of the branch targets.
    162     Instruction* insn = new Instruction(code, k, next);
    163     instructions_.push_back(insn);
    164     return insn;
    165   }
    166 }
    167 
    168 Instruction* CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) {
    169   if (BPF_CLASS(code) != BPF_RET) {
    170     SANDBOX_DIE("ErrorCodes can only be used in return expressions");
    171   }
    172   if (err.error_type_ != ErrorCode::ET_SIMPLE &&
    173       err.error_type_ != ErrorCode::ET_TRAP) {
    174     SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program");
    175   }
    176   return MakeInstruction(code, err.err_);
    177 }
    178 
    179 Instruction* CodeGen::MakeInstruction(uint16_t code,
    180                                       uint32_t k,
    181                                       Instruction* jt,
    182                                       Instruction* jf) {
    183   // We can handle all conditional jumps. They are followed by both a
    184   // "true" and a "false" branch.
    185   if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
    186     SANDBOX_DIE("Expected a BPF_JMP instruction");
    187   }
    188   if (!jt && !jf) {
    189     // We allow callers to defer specifying exactly one of the branch
    190     // targets. It must then be set later by calling "JoinInstructions".
    191     SANDBOX_DIE("Branches must jump to a valid instruction");
    192   }
    193   Instruction* insn = new Instruction(code, k, jt, jf);
    194   instructions_.push_back(insn);
    195   return insn;
    196 }
    197 
    198 void CodeGen::JoinInstructions(Instruction* head, Instruction* tail) {
    199   // Merge two instructions, or set the branch target for an "always" jump.
    200   // This function should be called, if the caller didn't initially provide
    201   // a value for "next" when creating the instruction.
    202   if (BPF_CLASS(head->code) == BPF_JMP) {
    203     if (BPF_OP(head->code) == BPF_JA) {
    204       if (head->jt_ptr) {
    205         SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
    206       }
    207       head->jt_ptr = tail;
    208     } else {
    209       if (!head->jt_ptr && head->jf_ptr) {
    210         head->jt_ptr = tail;
    211       } else if (!head->jf_ptr && head->jt_ptr) {
    212         head->jf_ptr = tail;
    213       } else {
    214         SANDBOX_DIE("Cannot append instructions after a jump");
    215       }
    216     }
    217   } else if (BPF_CLASS(head->code) == BPF_RET) {
    218     SANDBOX_DIE("Cannot append instructions after a return statement");
    219   } else if (head->next) {
    220     SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
    221   } else {
    222     head->next = tail;
    223   }
    224   return;
    225 }
    226 
    227 void CodeGen::Traverse(Instruction* instruction,
    228                        void (*fnc)(Instruction*, void*),
    229                        void* aux) {
    230   std::set<Instruction*> visited;
    231   TraverseRecursively(&visited, instruction);
    232   for (std::set<Instruction*>::const_iterator iter = visited.begin();
    233        iter != visited.end();
    234        ++iter) {
    235     fnc(*iter, aux);
    236   }
    237 }
    238 
    239 void CodeGen::FindBranchTargets(const Instruction& instructions,
    240                                 BranchTargets* branch_targets) {
    241   // Follow all possible paths through the "instructions" graph and compute
    242   // a list of branch targets. This will later be needed to compute the
    243   // boundaries of basic blocks.
    244   // We maintain a set of all instructions that we have previously seen. This
    245   // set ultimately converges on all instructions in the program.
    246   std::set<const Instruction*> seen_instructions;
    247   Instructions stack;
    248   for (const Instruction* insn = &instructions; insn;) {
    249     seen_instructions.insert(insn);
    250     if (BPF_CLASS(insn->code) == BPF_JMP) {
    251       // Found a jump. Increase count of incoming edges for each of the jump
    252       // targets.
    253       ++(*branch_targets)[insn->jt_ptr];
    254       if (BPF_OP(insn->code) != BPF_JA) {
    255         ++(*branch_targets)[insn->jf_ptr];
    256         stack.push_back(const_cast<Instruction*>(insn));
    257       }
    258       // Start a recursive decent for depth-first traversal.
    259       if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
    260         // We haven't seen the "true" branch yet. Traverse it now. We have
    261         // already remembered the "false" branch on the stack and will
    262         // traverse it later.
    263         insn = insn->jt_ptr;
    264         continue;
    265       } else {
    266         // Now try traversing the "false" branch.
    267         insn = NULL;
    268       }
    269     } else {
    270       // This is a non-jump instruction, just continue to the next instruction
    271       // (if any). It's OK if "insn" becomes NULL when reaching a return
    272       // instruction.
    273       if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
    274         SANDBOX_DIE(
    275             "Internal compiler error; return instruction must be at "
    276             "the end of the BPF program");
    277       }
    278       if (seen_instructions.find(insn->next) == seen_instructions.end()) {
    279         insn = insn->next;
    280       } else {
    281         // We have seen this instruction before. That could happen if it is
    282         // a branch target. No need to continue processing.
    283         insn = NULL;
    284       }
    285     }
    286     while (!insn && !stack.empty()) {
    287       // We are done processing all the way to a leaf node, backtrack up the
    288       // stack to any branches that we haven't processed yet. By definition,
    289       // this has to be a "false" branch, as we always process the "true"
    290       // branches right away.
    291       insn = stack.back();
    292       stack.pop_back();
    293       if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
    294         // We haven't seen the "false" branch yet. So, that's where we'll
    295         // go now.
    296         insn = insn->jf_ptr;
    297       } else {
    298         // We have seen both the "true" and the "false" branch, continue
    299         // up the stack.
    300         if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
    301           SANDBOX_DIE(
    302               "Internal compiler error; cannot find all "
    303               "branch targets");
    304         }
    305         insn = NULL;
    306       }
    307     }
    308   }
    309   return;
    310 }
    311 
    312 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) {
    313   // Iterate over all the instructions between "head" and "tail" and
    314   // insert them into a new basic block.
    315   BasicBlock* bb = new BasicBlock;
    316   for (;; head = head->next) {
    317     bb->instructions.push_back(head);
    318     if (head == tail) {
    319       break;
    320     }
    321     if (BPF_CLASS(head->code) == BPF_JMP) {
    322       SANDBOX_DIE("Found a jump inside of a basic block");
    323     }
    324   }
    325   basic_blocks_.push_back(bb);
    326   return bb;
    327 }
    328 
    329 void CodeGen::AddBasicBlock(Instruction* head,
    330                             Instruction* tail,
    331                             const BranchTargets& branch_targets,
    332                             TargetsToBlocks* basic_blocks,
    333                             BasicBlock** firstBlock) {
    334   // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
    335   // has not been set before.
    336   BranchTargets::const_iterator iter = branch_targets.find(head);
    337   if ((iter == branch_targets.end()) != !*firstBlock ||
    338       !*firstBlock != basic_blocks->empty()) {
    339     SANDBOX_DIE(
    340         "Only the very first basic block should have no "
    341         "incoming jumps");
    342   }
    343   BasicBlock* bb = MakeBasicBlock(head, tail);
    344   if (!*firstBlock) {
    345     *firstBlock = bb;
    346   }
    347   (*basic_blocks)[head] = bb;
    348   return;
    349 }
    350 
    351 BasicBlock* CodeGen::CutGraphIntoBasicBlocks(
    352     Instruction* instructions,
    353     const BranchTargets& branch_targets,
    354     TargetsToBlocks* basic_blocks) {
    355   // Textbook implementation of a basic block generator. All basic blocks
    356   // start with a branch target and end with either a return statement or
    357   // a jump (or are followed by an instruction that forms the beginning of a
    358   // new block). Both conditional and "always" jumps are supported.
    359   BasicBlock* first_block = NULL;
    360   std::set<const Instruction*> seen_instructions;
    361   Instructions stack;
    362   Instruction* tail = NULL;
    363   Instruction* head = instructions;
    364   for (Instruction* insn = head; insn;) {
    365     if (seen_instructions.find(insn) != seen_instructions.end()) {
    366       // We somehow went in a circle. This should never be possible. Not even
    367       // cyclic graphs are supposed to confuse us this much.
    368       SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
    369     }
    370     seen_instructions.insert(insn);
    371     if (tail && branch_targets.find(insn) != branch_targets.end()) {
    372       // We reached a branch target. Start a new basic block (this means,
    373       // flushing the previous basic block first).
    374       AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
    375       head = insn;
    376     }
    377     if (BPF_CLASS(insn->code) == BPF_JMP) {
    378       // We reached a jump instruction, this completes our current basic
    379       // block. Flush it and continue by traversing both the true and the
    380       // false branch of the jump. We need to maintain a stack to do so.
    381       AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
    382       if (BPF_OP(insn->code) != BPF_JA) {
    383         stack.push_back(insn->jf_ptr);
    384       }
    385       insn = insn->jt_ptr;
    386 
    387       // If we are jumping to an instruction that we have previously
    388       // processed, we are done with this branch. Continue by backtracking
    389       // up the stack.
    390       while (seen_instructions.find(insn) != seen_instructions.end()) {
    391       backtracking:
    392         if (stack.empty()) {
    393           // We successfully traversed all reachable instructions.
    394           return first_block;
    395         } else {
    396           // Going up the stack.
    397           insn = stack.back();
    398           stack.pop_back();
    399         }
    400       }
    401       // Starting a new basic block.
    402       tail = NULL;
    403       head = insn;
    404     } else {
    405       // We found a non-jumping instruction, append it to current basic
    406       // block.
    407       tail = insn;
    408       insn = insn->next;
    409       if (!insn) {
    410         // We reached a return statement, flush the current basic block and
    411         // backtrack up the stack.
    412         AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
    413         goto backtracking;
    414       }
    415     }
    416   }
    417   return first_block;
    418 }
    419 
    420 // We define a comparator that inspects the sequence of instructions in our
    421 // basic block and any blocks referenced by this block. This function can be
    422 // used in a "less" comparator for the purpose of storing pointers to basic
    423 // blocks in STL containers; this gives an easy option to use STL to find
    424 // shared tail  sequences of basic blocks.
    425 static int PointerCompare(const BasicBlock* block1,
    426                           const BasicBlock* block2,
    427                           const TargetsToBlocks& blocks) {
    428   // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
    429   // If we are looking at the exact same block, this is trivial and we don't
    430   // need to do a full comparison.
    431   if (block1 == block2) {
    432     return 0;
    433   }
    434 
    435   // We compare the sequence of instructions in both basic blocks.
    436   const Instructions& insns1 = block1->instructions;
    437   const Instructions& insns2 = block2->instructions;
    438   // Basic blocks should never be empty.
    439   CHECK(!insns1.empty());
    440   CHECK(!insns2.empty());
    441 
    442   Instructions::const_iterator iter1 = insns1.begin();
    443   Instructions::const_iterator iter2 = insns2.begin();
    444   for (;; ++iter1, ++iter2) {
    445     // If we have reached the end of the sequence of instructions in one or
    446     // both basic blocks, we know the relative ordering between the two blocks
    447     // and can return.
    448     if (iter1 == insns1.end() || iter2 == insns2.end()) {
    449       if (iter1 != insns1.end()) {
    450         return 1;
    451       }
    452       if (iter2 != insns2.end()) {
    453         return -1;
    454       }
    455 
    456       // If the two blocks are the same length (and have elementwise-equal code
    457       // and k fields) and their last instructions are neither a JMP nor a RET
    458       // (which is the only way we can reach this point), then we must compare
    459       // their successors.
    460       Instruction* const insns1_last = insns1.back();
    461       Instruction* const insns2_last = insns2.back();
    462       CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP &&
    463             BPF_CLASS(insns1_last->code) != BPF_RET);
    464 
    465       // Non jumping instructions will always have a valid next instruction.
    466       CHECK(insns1_last->next);
    467       CHECK(insns2_last->next);
    468       return PointerCompare(blocks.find(insns1_last->next)->second,
    469                             blocks.find(insns2_last->next)->second,
    470                             blocks);
    471     }
    472 
    473     // Compare the individual fields for both instructions.
    474     const Instruction& insn1 = **iter1;
    475     const Instruction& insn2 = **iter2;
    476     if (insn1.code != insn2.code) {
    477       return insn1.code - insn2.code;
    478     }
    479     if (insn1.k != insn2.k) {
    480       return insn1.k - insn2.k;
    481     }
    482 
    483     // Sanity check: If we're looking at a JMP or RET instruction, by definition
    484     // it should be the last instruction of the basic block.
    485     if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) {
    486       CHECK_EQ(insns1.back(), &insn1);
    487       CHECK_EQ(insns2.back(), &insn2);
    488     }
    489 
    490     // RET instructions terminate execution, and only JMP instructions use the
    491     // jt_ptr and jf_ptr fields.  Anything else can continue to the next
    492     // instruction in the basic block.
    493     if (BPF_CLASS(insn1.code) == BPF_RET) {
    494       return 0;
    495     } else if (BPF_CLASS(insn1.code) != BPF_JMP) {
    496       continue;
    497     }
    498 
    499     // Recursively compare the "true" and "false" branches.
    500     // A well-formed BPF program can't have any cycles, so we know
    501     // that our recursive algorithm will ultimately terminate.
    502     // In the unlikely event that the programmer made a mistake and
    503     // went out of the way to give us a cyclic program, we will crash
    504     // with a stack overflow. We are OK with that.
    505     if (BPF_OP(insn1.code) != BPF_JA) {
    506       int c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
    507                              blocks.find(insn2.jf_ptr)->second,
    508                              blocks);
    509       if (c != 0) {
    510         return c;
    511       }
    512     }
    513     return PointerCompare(blocks.find(insn1.jt_ptr)->second,
    514                           blocks.find(insn2.jt_ptr)->second,
    515                           blocks);
    516   }
    517 }
    518 
    519 void CodeGen::MergeTails(TargetsToBlocks* blocks) {
    520   // We enter all of our basic blocks into a set using the BasicBlock::Less()
    521   // comparator. This naturally results in blocks with identical tails of
    522   // instructions to map to the same entry in the set. Whenever we discover
    523   // that a particular chain of instructions is already in the set, we merge
    524   // the basic blocks and update the pointer in the "blocks" map.
    525   // Returns the number of unique basic blocks.
    526   // N.B. We don't merge instructions on a granularity that is finer than
    527   //      a basic block. In practice, this is sufficiently rare that we don't
    528   //      incur a big cost.
    529   //      Similarly, we currently don't merge anything other than tails. In
    530   //      the future, we might decide to revisit this decision and attempt to
    531   //      merge arbitrary sub-sequences of instructions.
    532   BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
    533   typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set;
    534   Set seen_basic_blocks(less);
    535   for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end();
    536        ++iter) {
    537     BasicBlock* bb = iter->second;
    538     Set::const_iterator entry = seen_basic_blocks.find(bb);
    539     if (entry == seen_basic_blocks.end()) {
    540       // This is the first time we see this particular sequence of
    541       // instructions. Enter the basic block into the set of known
    542       // basic blocks.
    543       seen_basic_blocks.insert(bb);
    544     } else {
    545       // We have previously seen another basic block that defines the same
    546       // sequence of instructions. Merge the two blocks and update the
    547       // pointer in the "blocks" map.
    548       iter->second = *entry;
    549     }
    550   }
    551 }
    552 
    553 void CodeGen::ComputeIncomingBranches(BasicBlock* block,
    554                                       const TargetsToBlocks& targets_to_blocks,
    555                                       IncomingBranches* incoming_branches) {
    556   // We increment the number of incoming branches each time we encounter a
    557   // basic block. But we only traverse recursively the very first time we
    558   // encounter a new block. This is necessary to make topological sorting
    559   // work correctly.
    560   if (++(*incoming_branches)[block] == 1) {
    561     Instruction* last_insn = block->instructions.back();
    562     if (BPF_CLASS(last_insn->code) == BPF_JMP) {
    563       ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second,
    564                               targets_to_blocks,
    565                               incoming_branches);
    566       if (BPF_OP(last_insn->code) != BPF_JA) {
    567         ComputeIncomingBranches(
    568             targets_to_blocks.find(last_insn->jf_ptr)->second,
    569             targets_to_blocks,
    570             incoming_branches);
    571       }
    572     } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
    573       ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
    574                               targets_to_blocks,
    575                               incoming_branches);
    576     }
    577   }
    578 }
    579 
    580 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block,
    581                                   const TargetsToBlocks& blocks,
    582                                   BasicBlocks* basic_blocks) {
    583   // Textbook implementation of a toposort. We keep looking for basic blocks
    584   // that don't have any incoming branches (initially, this is just the
    585   // "first_block") and add them to the topologically sorted list of
    586   // "basic_blocks". As we do so, we remove outgoing branches. This potentially
    587   // ends up making our descendants eligible for the sorted list. The
    588   // sorting algorithm terminates when there are no more basic blocks that have
    589   // no incoming branches. If we didn't move all blocks from the set of
    590   // "unordered_blocks" to the sorted list of "basic_blocks", there must have
    591   // been a cyclic dependency. This should never happen in a BPF program, as
    592   // well-formed BPF programs only ever have forward branches.
    593   IncomingBranches unordered_blocks;
    594   ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
    595 
    596   std::set<BasicBlock*> heads;
    597   for (;;) {
    598     // Move block from "unordered_blocks" to "basic_blocks".
    599     basic_blocks->push_back(first_block);
    600 
    601     // Inspect last instruction in the basic block. This is typically either a
    602     // jump or a return statement. But it could also be a "normal" instruction
    603     // that is followed by a jump target.
    604     Instruction* last_insn = first_block->instructions.back();
    605     if (BPF_CLASS(last_insn->code) == BPF_JMP) {
    606       // Remove outgoing branches. This might end up moving our descendants
    607       // into set of "head" nodes that no longer have any incoming branches.
    608       TargetsToBlocks::const_iterator iter;
    609       if (BPF_OP(last_insn->code) != BPF_JA) {
    610         iter = blocks.find(last_insn->jf_ptr);
    611         if (!--unordered_blocks[iter->second]) {
    612           heads.insert(iter->second);
    613         }
    614       }
    615       iter = blocks.find(last_insn->jt_ptr);
    616       if (!--unordered_blocks[iter->second]) {
    617         first_block = iter->second;
    618         continue;
    619       }
    620     } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
    621       // We encountered an instruction that doesn't change code flow. Try to
    622       // pick the next "first_block" from "last_insn->next", if possible.
    623       TargetsToBlocks::const_iterator iter;
    624       iter = blocks.find(last_insn->next);
    625       if (!--unordered_blocks[iter->second]) {
    626         first_block = iter->second;
    627         continue;
    628       } else {
    629         // Our basic block is supposed to be followed by "last_insn->next",
    630         // but dependencies prevent this from happening. Insert a BPF_JA
    631         // instruction to correct the code flow.
    632         Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next);
    633         first_block->instructions.push_back(ja);
    634         last_insn->next = ja;
    635       }
    636     }
    637     if (heads.empty()) {
    638       if (unordered_blocks.size() != basic_blocks->size()) {
    639         SANDBOX_DIE("Internal compiler error; cyclic graph detected");
    640       }
    641       return;
    642     }
    643     // Proceed by picking an arbitrary node from the set of basic blocks that
    644     // do not have any incoming branches.
    645     first_block = *heads.begin();
    646     heads.erase(heads.begin());
    647   }
    648 }
    649 
    650 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks,
    651                                    const TargetsToBlocks& targets_to_blocks) {
    652   // While we previously used pointers in jt_ptr and jf_ptr to link jump
    653   // instructions to their targets, we now convert these jumps to relative
    654   // jumps that are suitable for loading the BPF program into the kernel.
    655   int offset = 0;
    656 
    657   // Since we just completed a toposort, all jump targets are guaranteed to
    658   // go forward. This means, iterating over the basic blocks in reverse makes
    659   // it trivial to compute the correct offsets.
    660   BasicBlock* bb = NULL;
    661   BasicBlock* last_bb = NULL;
    662   for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
    663        iter != basic_blocks->rend();
    664        ++iter) {
    665     last_bb = bb;
    666     bb = *iter;
    667     Instruction* insn = bb->instructions.back();
    668     if (BPF_CLASS(insn->code) == BPF_JMP) {
    669       // Basic block ended in a jump instruction. We can now compute the
    670       // appropriate offsets.
    671       if (BPF_OP(insn->code) == BPF_JA) {
    672         // "Always" jumps use the 32bit "k" field for the offset, instead
    673         // of the 8bit "jt" and "jf" fields.
    674         int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
    675         insn->k = jmp;
    676         insn->jt = insn->jf = 0;
    677       } else {
    678         // The offset computations for conditional jumps are just the same
    679         // as for "always" jumps.
    680         int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
    681         int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset;
    682 
    683         // There is an added complication, because conditional relative jumps
    684         // can only jump at most 255 instructions forward. If we have to jump
    685         // further, insert an extra "always" jump.
    686         Instructions::size_type jmp = bb->instructions.size();
    687         if (jt > 255 || (jt == 255 && jf > 255)) {
    688           Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr);
    689           bb->instructions.push_back(ja);
    690           ja->k = jt;
    691           ja->jt = ja->jf = 0;
    692 
    693           // The newly inserted "always" jump, of course, requires us to adjust
    694           // the jump targets in the original conditional jump.
    695           jt = 0;
    696           ++jf;
    697         }
    698         if (jf > 255) {
    699           Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr);
    700           bb->instructions.insert(bb->instructions.begin() + jmp, ja);
    701           ja->k = jf;
    702           ja->jt = ja->jf = 0;
    703 
    704           // Again, we have to adjust the jump targets in the original
    705           // conditional jump.
    706           ++jt;
    707           jf = 0;
    708         }
    709 
    710         // Now we can finally set the relative jump targets in the conditional
    711         // jump instruction. Afterwards, we must no longer access the jt_ptr
    712         // and jf_ptr fields.
    713         insn->jt = jt;
    714         insn->jf = jf;
    715       }
    716     } else if (BPF_CLASS(insn->code) != BPF_RET &&
    717                targets_to_blocks.find(insn->next)->second != last_bb) {
    718       SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
    719     }
    720 
    721     // Proceed to next basic block.
    722     offset += bb->instructions.size();
    723     bb->offset = offset;
    724   }
    725   return;
    726 }
    727 
    728 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
    729                                      SandboxBPF::Program* program) {
    730   // Our basic blocks have been sorted and relative jump offsets have been
    731   // computed. The last remaining step is for all the instructions in our
    732   // basic blocks to be concatenated into a BPF program.
    733   program->clear();
    734   for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
    735        bb_iter != basic_blocks.end();
    736        ++bb_iter) {
    737     const BasicBlock& bb = **bb_iter;
    738     for (Instructions::const_iterator insn_iter = bb.instructions.begin();
    739          insn_iter != bb.instructions.end();
    740          ++insn_iter) {
    741       const Instruction& insn = **insn_iter;
    742       program->push_back(
    743           (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k});
    744     }
    745   }
    746   return;
    747 }
    748 
    749 void CodeGen::Compile(Instruction* instructions, SandboxBPF::Program* program) {
    750   if (compiled_) {
    751     SANDBOX_DIE(
    752         "Cannot call Compile() multiple times. Create a new code "
    753         "generator instead");
    754   }
    755   compiled_ = true;
    756 
    757   BranchTargets branch_targets;
    758   FindBranchTargets(*instructions, &branch_targets);
    759   TargetsToBlocks all_blocks;
    760   BasicBlock* first_block =
    761       CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
    762   MergeTails(&all_blocks);
    763   BasicBlocks basic_blocks;
    764   TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
    765   ComputeRelativeJumps(&basic_blocks, all_blocks);
    766   ConcatenateBasicBlocks(basic_blocks, program);
    767   return;
    768 }
    769 
    770 }  // namespace sandbox
    771