Home | History | Annotate | Download | only in seccomp-bpf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "sandbox/linux/seccomp-bpf/codegen.h"
      6 
      7 #include <stdio.h>
      8 
      9 #include <set>
     10 
     11 #include "base/logging.h"
     12 #include "sandbox/linux/seccomp-bpf/basicblock.h"
     13 #include "sandbox/linux/seccomp-bpf/die.h"
     14 #include "sandbox/linux/seccomp-bpf/instruction.h"
     15 #include "sandbox/linux/seccomp-bpf/linux_seccomp.h"
     16 
     17 namespace {
     18 
     19 // Helper function for Traverse().
     20 void TraverseRecursively(std::set<sandbox::Instruction*>* visited,
     21                          sandbox::Instruction* instruction) {
     22   if (visited->find(instruction) == visited->end()) {
     23     visited->insert(instruction);
     24     switch (BPF_CLASS(instruction->code)) {
     25       case BPF_JMP:
     26         if (BPF_OP(instruction->code) != BPF_JA) {
     27           TraverseRecursively(visited, instruction->jf_ptr);
     28         }
     29         TraverseRecursively(visited, instruction->jt_ptr);
     30         break;
     31       case BPF_RET:
     32         break;
     33       default:
     34         TraverseRecursively(visited, instruction->next);
     35         break;
     36     }
     37   }
     38 }
     39 
     40 }  // namespace
     41 
     42 namespace sandbox {
     43 
     44 CodeGen::CodeGen() : compiled_(false) {}
     45 
     46 CodeGen::~CodeGen() {
     47   for (Instructions::iterator iter = instructions_.begin();
     48        iter != instructions_.end();
     49        ++iter) {
     50     delete *iter;
     51   }
     52   for (BasicBlocks::iterator iter = basic_blocks_.begin();
     53        iter != basic_blocks_.end();
     54        ++iter) {
     55     delete *iter;
     56   }
     57 }
     58 
     59 void CodeGen::PrintProgram(const SandboxBPF::Program& program) {
     60   for (SandboxBPF::Program::const_iterator iter = program.begin();
     61        iter != program.end();
     62        ++iter) {
     63     int ip = (int)(iter - program.begin());
     64     fprintf(stderr, "%3d) ", ip);
     65     switch (BPF_CLASS(iter->code)) {
     66       case BPF_LD:
     67         if (iter->code == BPF_LD + BPF_W + BPF_ABS) {
     68           fprintf(stderr, "LOAD %d  // ", (int)iter->k);
     69           if (iter->k == offsetof(struct arch_seccomp_data, nr)) {
     70             fprintf(stderr, "System call number\n");
     71           } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) {
     72             fprintf(stderr, "Architecture\n");
     73           } else if (iter->k ==
     74                      offsetof(struct arch_seccomp_data, instruction_pointer)) {
     75             fprintf(stderr, "Instruction pointer (LSB)\n");
     76           } else if (iter->k ==
     77                      offsetof(struct arch_seccomp_data, instruction_pointer) +
     78                          4) {
     79             fprintf(stderr, "Instruction pointer (MSB)\n");
     80           } else if (iter->k >= offsetof(struct arch_seccomp_data, args) &&
     81                      iter->k < offsetof(struct arch_seccomp_data, args) + 48 &&
     82                      (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 ==
     83                          0) {
     84             fprintf(
     85                 stderr,
     86                 "Argument %d (%cSB)\n",
     87                 (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8,
     88                 (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M'
     89                                                                          : 'L');
     90           } else {
     91             fprintf(stderr, "???\n");
     92           }
     93         } else {
     94           fprintf(stderr, "LOAD ???\n");
     95         }
     96         break;
     97       case BPF_JMP:
     98         if (BPF_OP(iter->code) == BPF_JA) {
     99           fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
    100         } else {
    101           fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
    102               BPF_OP(iter->code) == BPF_JSET ? "&" :
    103               BPF_OP(iter->code) == BPF_JEQ ? "==" :
    104               BPF_OP(iter->code) == BPF_JGE ? ">=" :
    105               BPF_OP(iter->code) == BPF_JGT ? ">"  : "???",
    106               (int)iter->k,
    107               ip + iter->jt + 1, ip + iter->jf + 1);
    108         }
    109         break;
    110       case BPF_RET:
    111         fprintf(stderr, "RET 0x%x  // ", iter->k);
    112         if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) {
    113           fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA);
    114         } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
    115           fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA);
    116         } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE) {
    117           fprintf(stderr, "Trace #%d\n", iter->k & SECCOMP_RET_DATA);
    118         } else if (iter->k == SECCOMP_RET_ALLOW) {
    119           fprintf(stderr, "Allowed\n");
    120         } else {
    121           fprintf(stderr, "???\n");
    122         }
    123         break;
    124       case BPF_ALU:
    125         fprintf(stderr, BPF_OP(iter->code) == BPF_NEG
    126             ? "A := -A\n" : "A := A %s 0x%x\n",
    127             BPF_OP(iter->code) == BPF_ADD ? "+"  :
    128             BPF_OP(iter->code) == BPF_SUB ? "-"  :
    129             BPF_OP(iter->code) == BPF_MUL ? "*"  :
    130             BPF_OP(iter->code) == BPF_DIV ? "/"  :
    131             BPF_OP(iter->code) == BPF_MOD ? "%"  :
    132             BPF_OP(iter->code) == BPF_OR  ? "|"  :
    133             BPF_OP(iter->code) == BPF_XOR ? "^"  :
    134             BPF_OP(iter->code) == BPF_AND ? "&"  :
    135             BPF_OP(iter->code) == BPF_LSH ? "<<" :
    136             BPF_OP(iter->code) == BPF_RSH ? ">>" : "???",
    137             (int)iter->k);
    138         break;
    139       default:
    140         fprintf(stderr, "???\n");
    141         break;
    142     }
    143   }
    144   return;
    145 }
    146 
    147 Instruction* CodeGen::MakeInstruction(uint16_t code,
    148                                       uint32_t k,
    149                                       Instruction* next) {
    150   // We can handle non-jumping instructions and "always" jumps. Both of
    151   // them are followed by exactly one "next" instruction.
    152   // We allow callers to defer specifying "next", but then they must call
    153   // "joinInstructions" later.
    154   if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
    155     SANDBOX_DIE(
    156         "Must provide both \"true\" and \"false\" branch "
    157         "for a BPF_JMP");
    158   }
    159   if (next && BPF_CLASS(code) == BPF_RET) {
    160     SANDBOX_DIE("Cannot append instructions after a return statement");
    161   }
    162   if (BPF_CLASS(code) == BPF_JMP) {
    163     // "Always" jumps use the "true" branch target, only.
    164     Instruction* insn = new Instruction(code, 0, next, NULL);
    165     instructions_.push_back(insn);
    166     return insn;
    167   } else {
    168     // Non-jumping instructions do not use any of the branch targets.
    169     Instruction* insn = new Instruction(code, k, next);
    170     instructions_.push_back(insn);
    171     return insn;
    172   }
    173 }
    174 
    175 Instruction* CodeGen::MakeInstruction(uint16_t code,
    176                                       uint32_t k,
    177                                       Instruction* jt,
    178                                       Instruction* jf) {
    179   // We can handle all conditional jumps. They are followed by both a
    180   // "true" and a "false" branch.
    181   if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
    182     SANDBOX_DIE("Expected a BPF_JMP instruction");
    183   }
    184   if (!jt || !jf) {
    185     SANDBOX_DIE("Branches must jump to a valid instruction");
    186   }
    187   Instruction* insn = new Instruction(code, k, jt, jf);
    188   instructions_.push_back(insn);
    189   return insn;
    190 }
    191 
    192 void CodeGen::Traverse(Instruction* instruction,
    193                        void (*fnc)(Instruction*, void*),
    194                        void* aux) {
    195   std::set<Instruction*> visited;
    196   TraverseRecursively(&visited, instruction);
    197   for (std::set<Instruction*>::const_iterator iter = visited.begin();
    198        iter != visited.end();
    199        ++iter) {
    200     fnc(*iter, aux);
    201   }
    202 }
    203 
    204 void CodeGen::FindBranchTargets(const Instruction& instructions,
    205                                 BranchTargets* branch_targets) {
    206   // Follow all possible paths through the "instructions" graph and compute
    207   // a list of branch targets. This will later be needed to compute the
    208   // boundaries of basic blocks.
    209   // We maintain a set of all instructions that we have previously seen. This
    210   // set ultimately converges on all instructions in the program.
    211   std::set<const Instruction*> seen_instructions;
    212   Instructions stack;
    213   for (const Instruction* insn = &instructions; insn;) {
    214     seen_instructions.insert(insn);
    215     if (BPF_CLASS(insn->code) == BPF_JMP) {
    216       // Found a jump. Increase count of incoming edges for each of the jump
    217       // targets.
    218       ++(*branch_targets)[insn->jt_ptr];
    219       if (BPF_OP(insn->code) != BPF_JA) {
    220         ++(*branch_targets)[insn->jf_ptr];
    221         stack.push_back(const_cast<Instruction*>(insn));
    222       }
    223       // Start a recursive decent for depth-first traversal.
    224       if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
    225         // We haven't seen the "true" branch yet. Traverse it now. We have
    226         // already remembered the "false" branch on the stack and will
    227         // traverse it later.
    228         insn = insn->jt_ptr;
    229         continue;
    230       } else {
    231         // Now try traversing the "false" branch.
    232         insn = NULL;
    233       }
    234     } else {
    235       // This is a non-jump instruction, just continue to the next instruction
    236       // (if any). It's OK if "insn" becomes NULL when reaching a return
    237       // instruction.
    238       if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
    239         SANDBOX_DIE(
    240             "Internal compiler error; return instruction must be at "
    241             "the end of the BPF program");
    242       }
    243       if (seen_instructions.find(insn->next) == seen_instructions.end()) {
    244         insn = insn->next;
    245       } else {
    246         // We have seen this instruction before. That could happen if it is
    247         // a branch target. No need to continue processing.
    248         insn = NULL;
    249       }
    250     }
    251     while (!insn && !stack.empty()) {
    252       // We are done processing all the way to a leaf node, backtrack up the
    253       // stack to any branches that we haven't processed yet. By definition,
    254       // this has to be a "false" branch, as we always process the "true"
    255       // branches right away.
    256       insn = stack.back();
    257       stack.pop_back();
    258       if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
    259         // We haven't seen the "false" branch yet. So, that's where we'll
    260         // go now.
    261         insn = insn->jf_ptr;
    262       } else {
    263         // We have seen both the "true" and the "false" branch, continue
    264         // up the stack.
    265         if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
    266           SANDBOX_DIE(
    267               "Internal compiler error; cannot find all "
    268               "branch targets");
    269         }
    270         insn = NULL;
    271       }
    272     }
    273   }
    274   return;
    275 }
    276 
    277 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) {
    278   // Iterate over all the instructions between "head" and "tail" and
    279   // insert them into a new basic block.
    280   BasicBlock* bb = new BasicBlock;
    281   for (;; head = head->next) {
    282     bb->instructions.push_back(head);
    283     if (head == tail) {
    284       break;
    285     }
    286     if (BPF_CLASS(head->code) == BPF_JMP) {
    287       SANDBOX_DIE("Found a jump inside of a basic block");
    288     }
    289   }
    290   basic_blocks_.push_back(bb);
    291   return bb;
    292 }
    293 
    294 void CodeGen::AddBasicBlock(Instruction* head,
    295                             Instruction* tail,
    296                             const BranchTargets& branch_targets,
    297                             TargetsToBlocks* basic_blocks,
    298                             BasicBlock** firstBlock) {
    299   // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
    300   // has not been set before.
    301   BranchTargets::const_iterator iter = branch_targets.find(head);
    302   if ((iter == branch_targets.end()) != !*firstBlock ||
    303       !*firstBlock != basic_blocks->empty()) {
    304     SANDBOX_DIE(
    305         "Only the very first basic block should have no "
    306         "incoming jumps");
    307   }
    308   BasicBlock* bb = MakeBasicBlock(head, tail);
    309   if (!*firstBlock) {
    310     *firstBlock = bb;
    311   }
    312   (*basic_blocks)[head] = bb;
    313   return;
    314 }
    315 
    316 BasicBlock* CodeGen::CutGraphIntoBasicBlocks(
    317     Instruction* instructions,
    318     const BranchTargets& branch_targets,
    319     TargetsToBlocks* basic_blocks) {
    320   // Textbook implementation of a basic block generator. All basic blocks
    321   // start with a branch target and end with either a return statement or
    322   // a jump (or are followed by an instruction that forms the beginning of a
    323   // new block). Both conditional and "always" jumps are supported.
    324   BasicBlock* first_block = NULL;
    325   std::set<const Instruction*> seen_instructions;
    326   Instructions stack;
    327   Instruction* tail = NULL;
    328   Instruction* head = instructions;
    329   for (Instruction* insn = head; insn;) {
    330     if (seen_instructions.find(insn) != seen_instructions.end()) {
    331       // We somehow went in a circle. This should never be possible. Not even
    332       // cyclic graphs are supposed to confuse us this much.
    333       SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
    334     }
    335     seen_instructions.insert(insn);
    336     if (tail && branch_targets.find(insn) != branch_targets.end()) {
    337       // We reached a branch target. Start a new basic block (this means,
    338       // flushing the previous basic block first).
    339       AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
    340       head = insn;
    341     }
    342     if (BPF_CLASS(insn->code) == BPF_JMP) {
    343       // We reached a jump instruction, this completes our current basic
    344       // block. Flush it and continue by traversing both the true and the
    345       // false branch of the jump. We need to maintain a stack to do so.
    346       AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
    347       if (BPF_OP(insn->code) != BPF_JA) {
    348         stack.push_back(insn->jf_ptr);
    349       }
    350       insn = insn->jt_ptr;
    351 
    352       // If we are jumping to an instruction that we have previously
    353       // processed, we are done with this branch. Continue by backtracking
    354       // up the stack.
    355       while (seen_instructions.find(insn) != seen_instructions.end()) {
    356       backtracking:
    357         if (stack.empty()) {
    358           // We successfully traversed all reachable instructions.
    359           return first_block;
    360         } else {
    361           // Going up the stack.
    362           insn = stack.back();
    363           stack.pop_back();
    364         }
    365       }
    366       // Starting a new basic block.
    367       tail = NULL;
    368       head = insn;
    369     } else {
    370       // We found a non-jumping instruction, append it to current basic
    371       // block.
    372       tail = insn;
    373       insn = insn->next;
    374       if (!insn) {
    375         // We reached a return statement, flush the current basic block and
    376         // backtrack up the stack.
    377         AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
    378         goto backtracking;
    379       }
    380     }
    381   }
    382   return first_block;
    383 }
    384 
    385 // We define a comparator that inspects the sequence of instructions in our
    386 // basic block and any blocks referenced by this block. This function can be
    387 // used in a "less" comparator for the purpose of storing pointers to basic
    388 // blocks in STL containers; this gives an easy option to use STL to find
    389 // shared tail  sequences of basic blocks.
    390 static int PointerCompare(const BasicBlock* block1,
    391                           const BasicBlock* block2,
    392                           const TargetsToBlocks& blocks) {
    393   // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
    394   // If we are looking at the exact same block, this is trivial and we don't
    395   // need to do a full comparison.
    396   if (block1 == block2) {
    397     return 0;
    398   }
    399 
    400   // We compare the sequence of instructions in both basic blocks.
    401   const Instructions& insns1 = block1->instructions;
    402   const Instructions& insns2 = block2->instructions;
    403   // Basic blocks should never be empty.
    404   CHECK(!insns1.empty());
    405   CHECK(!insns2.empty());
    406 
    407   Instructions::const_iterator iter1 = insns1.begin();
    408   Instructions::const_iterator iter2 = insns2.begin();
    409   for (;; ++iter1, ++iter2) {
    410     // If we have reached the end of the sequence of instructions in one or
    411     // both basic blocks, we know the relative ordering between the two blocks
    412     // and can return.
    413     if (iter1 == insns1.end() || iter2 == insns2.end()) {
    414       if (iter1 != insns1.end()) {
    415         return 1;
    416       }
    417       if (iter2 != insns2.end()) {
    418         return -1;
    419       }
    420 
    421       // If the two blocks are the same length (and have elementwise-equal code
    422       // and k fields) and their last instructions are neither a JMP nor a RET
    423       // (which is the only way we can reach this point), then we must compare
    424       // their successors.
    425       Instruction* const insns1_last = insns1.back();
    426       Instruction* const insns2_last = insns2.back();
    427       CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP &&
    428             BPF_CLASS(insns1_last->code) != BPF_RET);
    429 
    430       // Non jumping instructions will always have a valid next instruction.
    431       CHECK(insns1_last->next);
    432       CHECK(insns2_last->next);
    433       return PointerCompare(blocks.find(insns1_last->next)->second,
    434                             blocks.find(insns2_last->next)->second,
    435                             blocks);
    436     }
    437 
    438     // Compare the individual fields for both instructions.
    439     const Instruction& insn1 = **iter1;
    440     const Instruction& insn2 = **iter2;
    441     if (insn1.code != insn2.code) {
    442       return insn1.code - insn2.code;
    443     }
    444     if (insn1.k != insn2.k) {
    445       return insn1.k - insn2.k;
    446     }
    447 
    448     // Sanity check: If we're looking at a JMP or RET instruction, by definition
    449     // it should be the last instruction of the basic block.
    450     if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) {
    451       CHECK_EQ(insns1.back(), &insn1);
    452       CHECK_EQ(insns2.back(), &insn2);
    453     }
    454 
    455     // RET instructions terminate execution, and only JMP instructions use the
    456     // jt_ptr and jf_ptr fields.  Anything else can continue to the next
    457     // instruction in the basic block.
    458     if (BPF_CLASS(insn1.code) == BPF_RET) {
    459       return 0;
    460     } else if (BPF_CLASS(insn1.code) != BPF_JMP) {
    461       continue;
    462     }
    463 
    464     // Recursively compare the "true" and "false" branches.
    465     // A well-formed BPF program can't have any cycles, so we know
    466     // that our recursive algorithm will ultimately terminate.
    467     // In the unlikely event that the programmer made a mistake and
    468     // went out of the way to give us a cyclic program, we will crash
    469     // with a stack overflow. We are OK with that.
    470     if (BPF_OP(insn1.code) != BPF_JA) {
    471       int c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
    472                              blocks.find(insn2.jf_ptr)->second,
    473                              blocks);
    474       if (c != 0) {
    475         return c;
    476       }
    477     }
    478     return PointerCompare(blocks.find(insn1.jt_ptr)->second,
    479                           blocks.find(insn2.jt_ptr)->second,
    480                           blocks);
    481   }
    482 }
    483 
    484 void CodeGen::MergeTails(TargetsToBlocks* blocks) {
    485   // We enter all of our basic blocks into a set using the BasicBlock::Less()
    486   // comparator. This naturally results in blocks with identical tails of
    487   // instructions to map to the same entry in the set. Whenever we discover
    488   // that a particular chain of instructions is already in the set, we merge
    489   // the basic blocks and update the pointer in the "blocks" map.
    490   // Returns the number of unique basic blocks.
    491   // N.B. We don't merge instructions on a granularity that is finer than
    492   //      a basic block. In practice, this is sufficiently rare that we don't
    493   //      incur a big cost.
    494   //      Similarly, we currently don't merge anything other than tails. In
    495   //      the future, we might decide to revisit this decision and attempt to
    496   //      merge arbitrary sub-sequences of instructions.
    497   BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
    498   typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set;
    499   Set seen_basic_blocks(less);
    500   for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end();
    501        ++iter) {
    502     BasicBlock* bb = iter->second;
    503     Set::const_iterator entry = seen_basic_blocks.find(bb);
    504     if (entry == seen_basic_blocks.end()) {
    505       // This is the first time we see this particular sequence of
    506       // instructions. Enter the basic block into the set of known
    507       // basic blocks.
    508       seen_basic_blocks.insert(bb);
    509     } else {
    510       // We have previously seen another basic block that defines the same
    511       // sequence of instructions. Merge the two blocks and update the
    512       // pointer in the "blocks" map.
    513       iter->second = *entry;
    514     }
    515   }
    516 }
    517 
    518 void CodeGen::ComputeIncomingBranches(BasicBlock* block,
    519                                       const TargetsToBlocks& targets_to_blocks,
    520                                       IncomingBranches* incoming_branches) {
    521   // We increment the number of incoming branches each time we encounter a
    522   // basic block. But we only traverse recursively the very first time we
    523   // encounter a new block. This is necessary to make topological sorting
    524   // work correctly.
    525   if (++(*incoming_branches)[block] == 1) {
    526     Instruction* last_insn = block->instructions.back();
    527     if (BPF_CLASS(last_insn->code) == BPF_JMP) {
    528       ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second,
    529                               targets_to_blocks,
    530                               incoming_branches);
    531       if (BPF_OP(last_insn->code) != BPF_JA) {
    532         ComputeIncomingBranches(
    533             targets_to_blocks.find(last_insn->jf_ptr)->second,
    534             targets_to_blocks,
    535             incoming_branches);
    536       }
    537     } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
    538       ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
    539                               targets_to_blocks,
    540                               incoming_branches);
    541     }
    542   }
    543 }
    544 
    545 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block,
    546                                   const TargetsToBlocks& blocks,
    547                                   BasicBlocks* basic_blocks) {
    548   // Textbook implementation of a toposort. We keep looking for basic blocks
    549   // that don't have any incoming branches (initially, this is just the
    550   // "first_block") and add them to the topologically sorted list of
    551   // "basic_blocks". As we do so, we remove outgoing branches. This potentially
    552   // ends up making our descendants eligible for the sorted list. The
    553   // sorting algorithm terminates when there are no more basic blocks that have
    554   // no incoming branches. If we didn't move all blocks from the set of
    555   // "unordered_blocks" to the sorted list of "basic_blocks", there must have
    556   // been a cyclic dependency. This should never happen in a BPF program, as
    557   // well-formed BPF programs only ever have forward branches.
    558   IncomingBranches unordered_blocks;
    559   ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
    560 
    561   std::set<BasicBlock*> heads;
    562   for (;;) {
    563     // Move block from "unordered_blocks" to "basic_blocks".
    564     basic_blocks->push_back(first_block);
    565 
    566     // Inspect last instruction in the basic block. This is typically either a
    567     // jump or a return statement. But it could also be a "normal" instruction
    568     // that is followed by a jump target.
    569     Instruction* last_insn = first_block->instructions.back();
    570     if (BPF_CLASS(last_insn->code) == BPF_JMP) {
    571       // Remove outgoing branches. This might end up moving our descendants
    572       // into set of "head" nodes that no longer have any incoming branches.
    573       TargetsToBlocks::const_iterator iter;
    574       if (BPF_OP(last_insn->code) != BPF_JA) {
    575         iter = blocks.find(last_insn->jf_ptr);
    576         if (!--unordered_blocks[iter->second]) {
    577           heads.insert(iter->second);
    578         }
    579       }
    580       iter = blocks.find(last_insn->jt_ptr);
    581       if (!--unordered_blocks[iter->second]) {
    582         first_block = iter->second;
    583         continue;
    584       }
    585     } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
    586       // We encountered an instruction that doesn't change code flow. Try to
    587       // pick the next "first_block" from "last_insn->next", if possible.
    588       TargetsToBlocks::const_iterator iter;
    589       iter = blocks.find(last_insn->next);
    590       if (!--unordered_blocks[iter->second]) {
    591         first_block = iter->second;
    592         continue;
    593       } else {
    594         // Our basic block is supposed to be followed by "last_insn->next",
    595         // but dependencies prevent this from happening. Insert a BPF_JA
    596         // instruction to correct the code flow.
    597         Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next);
    598         first_block->instructions.push_back(ja);
    599         last_insn->next = ja;
    600       }
    601     }
    602     if (heads.empty()) {
    603       if (unordered_blocks.size() != basic_blocks->size()) {
    604         SANDBOX_DIE("Internal compiler error; cyclic graph detected");
    605       }
    606       return;
    607     }
    608     // Proceed by picking an arbitrary node from the set of basic blocks that
    609     // do not have any incoming branches.
    610     first_block = *heads.begin();
    611     heads.erase(heads.begin());
    612   }
    613 }
    614 
    615 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks,
    616                                    const TargetsToBlocks& targets_to_blocks) {
    617   // While we previously used pointers in jt_ptr and jf_ptr to link jump
    618   // instructions to their targets, we now convert these jumps to relative
    619   // jumps that are suitable for loading the BPF program into the kernel.
    620   int offset = 0;
    621 
    622   // Since we just completed a toposort, all jump targets are guaranteed to
    623   // go forward. This means, iterating over the basic blocks in reverse makes
    624   // it trivial to compute the correct offsets.
    625   BasicBlock* bb = NULL;
    626   BasicBlock* last_bb = NULL;
    627   for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
    628        iter != basic_blocks->rend();
    629        ++iter) {
    630     last_bb = bb;
    631     bb = *iter;
    632     Instruction* insn = bb->instructions.back();
    633     if (BPF_CLASS(insn->code) == BPF_JMP) {
    634       // Basic block ended in a jump instruction. We can now compute the
    635       // appropriate offsets.
    636       if (BPF_OP(insn->code) == BPF_JA) {
    637         // "Always" jumps use the 32bit "k" field for the offset, instead
    638         // of the 8bit "jt" and "jf" fields.
    639         int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
    640         insn->k = jmp;
    641         insn->jt = insn->jf = 0;
    642       } else {
    643         // The offset computations for conditional jumps are just the same
    644         // as for "always" jumps.
    645         int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
    646         int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset;
    647 
    648         // There is an added complication, because conditional relative jumps
    649         // can only jump at most 255 instructions forward. If we have to jump
    650         // further, insert an extra "always" jump.
    651         Instructions::size_type jmp = bb->instructions.size();
    652         if (jt > 255 || (jt == 255 && jf > 255)) {
    653           Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr);
    654           bb->instructions.push_back(ja);
    655           ja->k = jt;
    656           ja->jt = ja->jf = 0;
    657 
    658           // The newly inserted "always" jump, of course, requires us to adjust
    659           // the jump targets in the original conditional jump.
    660           jt = 0;
    661           ++jf;
    662         }
    663         if (jf > 255) {
    664           Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr);
    665           bb->instructions.insert(bb->instructions.begin() + jmp, ja);
    666           ja->k = jf;
    667           ja->jt = ja->jf = 0;
    668 
    669           // Again, we have to adjust the jump targets in the original
    670           // conditional jump.
    671           ++jt;
    672           jf = 0;
    673         }
    674 
    675         // Now we can finally set the relative jump targets in the conditional
    676         // jump instruction. Afterwards, we must no longer access the jt_ptr
    677         // and jf_ptr fields.
    678         insn->jt = jt;
    679         insn->jf = jf;
    680       }
    681     } else if (BPF_CLASS(insn->code) != BPF_RET &&
    682                targets_to_blocks.find(insn->next)->second != last_bb) {
    683       SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
    684     }
    685 
    686     // Proceed to next basic block.
    687     offset += bb->instructions.size();
    688     bb->offset = offset;
    689   }
    690   return;
    691 }
    692 
    693 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
    694                                      SandboxBPF::Program* program) {
    695   // Our basic blocks have been sorted and relative jump offsets have been
    696   // computed. The last remaining step is for all the instructions in our
    697   // basic blocks to be concatenated into a BPF program.
    698   program->clear();
    699   for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
    700        bb_iter != basic_blocks.end();
    701        ++bb_iter) {
    702     const BasicBlock& bb = **bb_iter;
    703     for (Instructions::const_iterator insn_iter = bb.instructions.begin();
    704          insn_iter != bb.instructions.end();
    705          ++insn_iter) {
    706       const Instruction& insn = **insn_iter;
    707       program->push_back(
    708           (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k});
    709     }
    710   }
    711   return;
    712 }
    713 
    714 void CodeGen::Compile(Instruction* instructions, SandboxBPF::Program* program) {
    715   if (compiled_) {
    716     SANDBOX_DIE(
    717         "Cannot call Compile() multiple times. Create a new code "
    718         "generator instead");
    719   }
    720   compiled_ = true;
    721 
    722   BranchTargets branch_targets;
    723   FindBranchTargets(*instructions, &branch_targets);
    724   TargetsToBlocks all_blocks;
    725   BasicBlock* first_block =
    726       CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
    727   MergeTails(&all_blocks);
    728   BasicBlocks basic_blocks;
    729   TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
    730   ComputeRelativeJumps(&basic_blocks, all_blocks);
    731   ConcatenateBasicBlocks(basic_blocks, program);
    732   return;
    733 }
    734 
    735 }  // namespace sandbox
    736