Home | History | Annotate | Download | only in seccomp-bpf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <stdio.h>
      6 
      7 #include "sandbox/linux/seccomp-bpf/codegen.h"
      8 
      9 
     10 namespace {
     11 
     12 // Helper function for Traverse().
     13 void TraverseRecursively(std::set<playground2::Instruction *> *visited,
     14                          playground2::Instruction *instruction) {
     15   if (visited->find(instruction) == visited->end()) {
     16     visited->insert(instruction);
     17     switch (BPF_CLASS(instruction->code)) {
     18     case BPF_JMP:
     19       if (BPF_OP(instruction->code) != BPF_JA) {
     20         TraverseRecursively(visited, instruction->jf_ptr);
     21       }
     22       TraverseRecursively(visited, instruction->jt_ptr);
     23       break;
     24     case BPF_RET:
     25       break;
     26     default:
     27       TraverseRecursively(visited, instruction->next);
     28       break;
     29     }
     30   }
     31 }
     32 
     33 }  // namespace
     34 
     35 namespace playground2 {
     36 
     37 CodeGen::CodeGen()
     38     : compiled_(false) {
     39 }
     40 
     41 CodeGen::~CodeGen() {
     42   for (Instructions::iterator iter = instructions_.begin();
     43        iter != instructions_.end();
     44        ++iter) {
     45     delete *iter;
     46   }
     47   for (BasicBlocks::iterator iter = basic_blocks_.begin();
     48        iter != basic_blocks_.end();
     49        ++iter) {
     50     delete *iter;
     51   }
     52 }
     53 
     54 void CodeGen::PrintProgram(const Sandbox::Program& program) {
     55   for (Sandbox::Program::const_iterator iter = program.begin();
     56        iter != program.end();
     57        ++iter) {
     58     int ip = (int)(iter - program.begin());
     59     fprintf(stderr, "%3d) ", ip);
     60     switch (BPF_CLASS(iter->code)) {
     61     case BPF_LD:
     62       if (iter->code == BPF_LD+BPF_W+BPF_ABS) {
     63         fprintf(stderr, "LOAD %d  // ", (int)iter->k);
     64         if (iter->k == offsetof(struct arch_seccomp_data, nr)) {
     65           fprintf(stderr, "System call number\n");
     66         } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) {
     67           fprintf(stderr, "Architecture\n");
     68         } else if (iter->k == offsetof(struct arch_seccomp_data,
     69                                        instruction_pointer)) {
     70           fprintf(stderr, "Instruction pointer (LSB)\n");
     71         } else if (iter->k == offsetof(struct arch_seccomp_data,
     72                                        instruction_pointer) + 4) {
     73           fprintf(stderr, "Instruction pointer (MSB)\n");
     74         } else if (iter->k >= offsetof(struct arch_seccomp_data, args) &&
     75                    iter->k <  offsetof(struct arch_seccomp_data, args)+48 &&
     76                    (iter->k-offsetof(struct arch_seccomp_data, args))%4 == 0) {
     77           fprintf(stderr, "Argument %d (%cSB)\n",
     78                   (int)(iter->k-offsetof(struct arch_seccomp_data, args))/8,
     79                   (iter->k-offsetof(struct arch_seccomp_data,
     80                                     args))%8 ? 'M' : 'L');
     81         } else {
     82           fprintf(stderr, "???\n");
     83         }
     84       } else {
     85         fprintf(stderr, "LOAD ???\n");
     86       }
     87       break;
     88     case BPF_JMP:
     89       if (BPF_OP(iter->code) == BPF_JA) {
     90         fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
     91       } else {
     92         fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
     93                 BPF_OP(iter->code) == BPF_JSET ? "&" :
     94                 BPF_OP(iter->code) == BPF_JEQ ? "==" :
     95                 BPF_OP(iter->code) == BPF_JGE ? ">=" :
     96                 BPF_OP(iter->code) == BPF_JGT ? ">"  : "???",
     97                 (int)iter->k,
     98                 ip + iter->jt + 1, ip + iter->jf + 1);
     99       }
    100       break;
    101     case BPF_RET:
    102       fprintf(stderr, "RET 0x%x  // ", iter->k);
    103       if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) {
    104         fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA);
    105       } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
    106         fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA);
    107       } else if (iter->k == SECCOMP_RET_ALLOW) {
    108         fprintf(stderr, "Allowed\n");
    109       } else {
    110         fprintf(stderr, "???\n");
    111       }
    112       break;
    113     case BPF_ALU:
    114       fprintf(stderr, BPF_OP(iter->code) == BPF_NEG
    115               ? "A := -A\n" : "A := A %s 0x%x\n",
    116               BPF_OP(iter->code) == BPF_ADD ? "+"  :
    117               BPF_OP(iter->code) == BPF_SUB ? "-"  :
    118               BPF_OP(iter->code) == BPF_MUL ? "*"  :
    119               BPF_OP(iter->code) == BPF_DIV ? "/"  :
    120               BPF_OP(iter->code) == BPF_MOD ? "%"  :
    121               BPF_OP(iter->code) == BPF_OR  ? "|"  :
    122               BPF_OP(iter->code) == BPF_XOR ? "^"  :
    123               BPF_OP(iter->code) == BPF_AND ? "&"  :
    124               BPF_OP(iter->code) == BPF_LSH ? "<<" :
    125               BPF_OP(iter->code) == BPF_RSH ? ">>" : "???",
    126               (int)iter->k);
    127       break;
    128     default:
    129       fprintf(stderr, "???\n");
    130       break;
    131     }
    132   }
    133   return;
    134 }
    135 
    136 Instruction *CodeGen::MakeInstruction(uint16_t code, uint32_t k,
    137                                       Instruction *next) {
    138   // We can handle non-jumping instructions and "always" jumps. Both of
    139   // them are followed by exactly one "next" instruction.
    140   // We allow callers to defer specifying "next", but then they must call
    141   // "joinInstructions" later.
    142   if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
    143     SANDBOX_DIE("Must provide both \"true\" and \"false\" branch "
    144                 "for a BPF_JMP");
    145   }
    146   if (next && BPF_CLASS(code) == BPF_RET) {
    147     SANDBOX_DIE("Cannot append instructions after a return statement");
    148   }
    149   if (BPF_CLASS(code) == BPF_JMP) {
    150     // "Always" jumps use the "true" branch target, only.
    151     Instruction *insn = new Instruction(code, 0, next, NULL);
    152     instructions_.push_back(insn);
    153     return insn;
    154   } else {
    155     // Non-jumping instructions do not use any of the branch targets.
    156     Instruction *insn = new Instruction(code, k, next);
    157     instructions_.push_back(insn);
    158     return insn;
    159   }
    160 }
    161 
    162 Instruction *CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) {
    163   if (BPF_CLASS(code) != BPF_RET) {
    164     SANDBOX_DIE("ErrorCodes can only be used in return expressions");
    165   }
    166   if (err.error_type_ != ErrorCode::ET_SIMPLE &&
    167       err.error_type_ != ErrorCode::ET_TRAP) {
    168     SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program");
    169   }
    170   return MakeInstruction(code, err.err_);
    171 }
    172 
    173 Instruction *CodeGen::MakeInstruction(uint16_t code, uint32_t k,
    174                                       Instruction *jt, Instruction *jf) {
    175   // We can handle all conditional jumps. They are followed by both a
    176   // "true" and a "false" branch.
    177   if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
    178     SANDBOX_DIE("Expected a BPF_JMP instruction");
    179   }
    180   if (!jt && !jf) {
    181     // We allow callers to defer specifying exactly one of the branch
    182     // targets. It must then be set later by calling "JoinInstructions".
    183     SANDBOX_DIE("Branches must jump to a valid instruction");
    184   }
    185   Instruction *insn = new Instruction(code, k, jt, jf);
    186   instructions_.push_back(insn);
    187   return insn;
    188 }
    189 
    190 void CodeGen::JoinInstructions(Instruction *head, Instruction *tail) {
    191   // Merge two instructions, or set the branch target for an "always" jump.
    192   // This function should be called, if the caller didn't initially provide
    193   // a value for "next" when creating the instruction.
    194   if (BPF_CLASS(head->code) == BPF_JMP) {
    195     if (BPF_OP(head->code) == BPF_JA) {
    196       if (head->jt_ptr) {
    197         SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
    198       }
    199       head->jt_ptr = tail;
    200     } else {
    201       if (!head->jt_ptr && head->jf_ptr) {
    202         head->jt_ptr = tail;
    203       } else if (!head->jf_ptr && head->jt_ptr) {
    204         head->jf_ptr = tail;
    205       } else {
    206         SANDBOX_DIE("Cannot append instructions after a jump");
    207       }
    208     }
    209   } else if (BPF_CLASS(head->code) == BPF_RET) {
    210     SANDBOX_DIE("Cannot append instructions after a return statement");
    211   } else if (head->next) {
    212     SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
    213   } else {
    214     head->next = tail;
    215   }
    216   return;
    217 }
    218 
    219 void CodeGen::Traverse(Instruction *instruction,
    220                        void (*fnc)(Instruction *, void *), void *aux) {
    221   std::set<Instruction *> visited;
    222   TraverseRecursively(&visited, instruction);
    223   for (std::set<Instruction *>::const_iterator iter = visited.begin();
    224        iter != visited.end();
    225        ++iter) {
    226     fnc(*iter, aux);
    227   }
    228 }
    229 
    230 void CodeGen::FindBranchTargets(const Instruction& instructions,
    231                                 BranchTargets *branch_targets) {
    232   // Follow all possible paths through the "instructions" graph and compute
    233   // a list of branch targets. This will later be needed to compute the
    234   // boundaries of basic blocks.
    235   // We maintain a set of all instructions that we have previously seen. This
    236   // set ultimately converges on all instructions in the program.
    237   std::set<const Instruction *> seen_instructions;
    238   Instructions stack;
    239   for (const Instruction *insn = &instructions; insn; ) {
    240     seen_instructions.insert(insn);
    241     if (BPF_CLASS(insn->code) == BPF_JMP) {
    242       // Found a jump. Increase count of incoming edges for each of the jump
    243       // targets.
    244       ++(*branch_targets)[insn->jt_ptr];
    245       if (BPF_OP(insn->code) != BPF_JA) {
    246         ++(*branch_targets)[insn->jf_ptr];
    247         stack.push_back(const_cast<Instruction *>(insn));
    248       }
    249       // Start a recursive decent for depth-first traversal.
    250       if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
    251         // We haven't seen the "true" branch yet. Traverse it now. We have
    252         // already remembered the "false" branch on the stack and will
    253         // traverse it later.
    254         insn = insn->jt_ptr;
    255         continue;
    256       } else {
    257         // Now try traversing the "false" branch.
    258         insn = NULL;
    259       }
    260     } else {
    261       // This is a non-jump instruction, just continue to the next instruction
    262       // (if any). It's OK if "insn" becomes NULL when reaching a return
    263       // instruction.
    264       if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
    265         SANDBOX_DIE("Internal compiler error; return instruction must be at "
    266                     "the end of the BPF program");
    267       }
    268       if (seen_instructions.find(insn->next) == seen_instructions.end()) {
    269         insn = insn->next;
    270       } else {
    271         // We have seen this instruction before. That could happen if it is
    272         // a branch target. No need to continue processing.
    273         insn = NULL;
    274       }
    275     }
    276     while (!insn && !stack.empty()) {
    277       // We are done processing all the way to a leaf node, backtrack up the
    278       // stack to any branches that we haven't processed yet. By definition,
    279       // this has to be a "false" branch, as we always process the "true"
    280       // branches right away.
    281       insn = stack.back();
    282       stack.pop_back();
    283       if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
    284         // We haven't seen the "false" branch yet. So, that's where we'll
    285         // go now.
    286         insn = insn->jf_ptr;
    287       } else {
    288         // We have seen both the "true" and the "false" branch, continue
    289         // up the stack.
    290         if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
    291           SANDBOX_DIE("Internal compiler error; cannot find all "
    292                       "branch targets");
    293         }
    294         insn = NULL;
    295       }
    296     }
    297   }
    298   return;
    299 }
    300 
    301 BasicBlock *CodeGen::MakeBasicBlock(Instruction *head,
    302                                     Instruction *tail) {
    303   // Iterate over all the instructions between "head" and "tail" and
    304   // insert them into a new basic block.
    305   BasicBlock *bb = new BasicBlock;
    306   for (;; head = head->next) {
    307     bb->instructions.push_back(head);
    308     if (head == tail) {
    309       break;
    310     }
    311     if (BPF_CLASS(head->code) == BPF_JMP) {
    312       SANDBOX_DIE("Found a jump inside of a basic block");
    313     }
    314   }
    315   basic_blocks_.push_back(bb);
    316   return bb;
    317 }
    318 
    319 void CodeGen::AddBasicBlock(Instruction *head,
    320                             Instruction *tail,
    321                             const BranchTargets& branch_targets,
    322                             TargetsToBlocks *basic_blocks,
    323                             BasicBlock **firstBlock) {
    324   // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
    325   // has not been set before.
    326   BranchTargets::const_iterator iter = branch_targets.find(head);
    327   if ((iter == branch_targets.end()) != !*firstBlock ||
    328       !*firstBlock != basic_blocks->empty()) {
    329     SANDBOX_DIE("Only the very first basic block should have no "
    330                 "incoming jumps");
    331   }
    332   BasicBlock *bb = MakeBasicBlock(head, tail);
    333   if (!*firstBlock) {
    334     *firstBlock = bb;
    335   }
    336   (*basic_blocks)[head] = bb;
    337   return;
    338 }
    339 
    340 BasicBlock *CodeGen::CutGraphIntoBasicBlocks(
    341     Instruction *instructions, const BranchTargets& branch_targets,
    342     TargetsToBlocks *basic_blocks) {
    343   // Textbook implementation of a basic block generator. All basic blocks
    344   // start with a branch target and end with either a return statement or
    345   // a jump (or are followed by an instruction that forms the beginning of a
    346   // new block). Both conditional and "always" jumps are supported.
    347   BasicBlock *first_block = NULL;
    348   std::set<const Instruction *> seen_instructions;
    349   Instructions stack;
    350   Instruction *tail = NULL;
    351   Instruction *head = instructions;
    352   for (Instruction *insn = head; insn; ) {
    353     if (seen_instructions.find(insn) != seen_instructions.end()) {
    354       // We somehow went in a circle. This should never be possible. Not even
    355       // cyclic graphs are supposed to confuse us this much.
    356       SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
    357     }
    358     seen_instructions.insert(insn);
    359     if (tail && branch_targets.find(insn) != branch_targets.end()) {
    360       // We reached a branch target. Start a new basic block (this means,
    361       // flushing the previous basic block first).
    362       AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
    363       head = insn;
    364     }
    365     if (BPF_CLASS(insn->code) == BPF_JMP) {
    366       // We reached a jump instruction, this completes our current basic
    367       // block. Flush it and continue by traversing both the true and the
    368       // false branch of the jump. We need to maintain a stack to do so.
    369       AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
    370       if (BPF_OP(insn->code) != BPF_JA) {
    371         stack.push_back(insn->jf_ptr);
    372       }
    373       insn = insn->jt_ptr;
    374 
    375       // If we are jumping to an instruction that we have previously
    376       // processed, we are done with this branch. Continue by backtracking
    377       // up the stack.
    378       while (seen_instructions.find(insn) != seen_instructions.end()) {
    379       backtracking:
    380         if (stack.empty()) {
    381           // We successfully traversed all reachable instructions.
    382           return first_block;
    383         } else {
    384           // Going up the stack.
    385           insn = stack.back();
    386           stack.pop_back();
    387         }
    388       }
    389       // Starting a new basic block.
    390       tail = NULL;
    391       head = insn;
    392     } else {
    393       // We found a non-jumping instruction, append it to current basic
    394       // block.
    395       tail = insn;
    396       insn = insn->next;
    397       if (!insn) {
    398         // We reached a return statement, flush the current basic block and
    399         // backtrack up the stack.
    400         AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
    401         goto backtracking;
    402       }
    403     }
    404   }
    405   return first_block;
    406 }
    407 
    408 // We define a comparator that inspects the sequence of instructions in our
    409 // basic block and any blocks referenced by this block. This function can be
    410 // used in a "less" comparator for the purpose of storing pointers to basic
    411 // blocks in STL containers; this gives an easy option to use STL to find
    412 // shared tail  sequences of basic blocks.
    413 static int PointerCompare(const BasicBlock *block1, const BasicBlock *block2,
    414                           const TargetsToBlocks& blocks) {
    415   // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
    416   // If we are looking at the exact same block, this is trivial and we don't
    417   // need to do a full comparison.
    418   if (block1 == block2) {
    419     return 0;
    420   }
    421 
    422   // We compare the sequence of instructions in both basic blocks.
    423   const Instructions& insns1 = block1->instructions;
    424   const Instructions& insns2 = block2->instructions;
    425   Instructions::const_iterator iter1 = insns1.begin();
    426   Instructions::const_iterator iter2 = insns2.begin();
    427   for (;; ++iter1, ++iter2) {
    428     // If we have reached the end of the sequence of instructions in one or
    429     // both basic blocks, we know the relative ordering between the two blocks
    430     // and can return.
    431     if (iter1 == insns1.end()) {
    432       return iter2 == insns2.end() ? 0 : -1;
    433     } else if (iter2 == insns2.end()) {
    434       return 1;
    435     }
    436 
    437     // Compare the individual fields for both instructions.
    438     const Instruction& insn1 = **iter1;
    439     const Instruction& insn2 = **iter2;
    440     if (insn1.code == insn2.code) {
    441       if (insn1.k == insn2.k) {
    442         // Only conditional jump instructions use the jt_ptr and jf_ptr
    443         // fields.
    444         if (BPF_CLASS(insn1.code) == BPF_JMP) {
    445           if (BPF_OP(insn1.code) != BPF_JA) {
    446             // Recursively compare the "true" and "false" branches.
    447             // A well-formed BPF program can't have any cycles, so we know
    448             // that our recursive algorithm will ultimately terminate.
    449             // In the unlikely event that the programmer made a mistake and
    450             // went out of the way to give us a cyclic program, we will crash
    451             // with a stack overflow. We are OK with that.
    452             int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
    453                                    blocks.find(insn2.jt_ptr)->second,
    454                                    blocks);
    455             if (c == 0) {
    456               c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
    457                                  blocks.find(insn2.jf_ptr)->second,
    458                                  blocks);
    459               if (c == 0) {
    460                 continue;
    461               } else {
    462                 return c;
    463               }
    464             } else {
    465               return c;
    466             }
    467           } else {
    468             int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
    469                                    blocks.find(insn2.jt_ptr)->second,
    470                                    blocks);
    471             if (c == 0) {
    472               continue;
    473             } else {
    474               return c;
    475             }
    476           }
    477         } else {
    478           continue;
    479         }
    480       } else {
    481         return insn1.k - insn2.k;
    482       }
    483     } else {
    484       return insn1.code - insn2.code;
    485     }
    486   }
    487 }
    488 
    489 void CodeGen::MergeTails(TargetsToBlocks *blocks) {
    490   // We enter all of our basic blocks into a set using the BasicBlock::Less()
    491   // comparator. This naturally results in blocks with identical tails of
    492   // instructions to map to the same entry in the set. Whenever we discover
    493   // that a particular chain of instructions is already in the set, we merge
    494   // the basic blocks and update the pointer in the "blocks" map.
    495   // Returns the number of unique basic blocks.
    496   // N.B. We don't merge instructions on a granularity that is finer than
    497   //      a basic block. In practice, this is sufficiently rare that we don't
    498   //      incur a big cost.
    499   //      Similarly, we currently don't merge anything other than tails. In
    500   //      the future, we might decide to revisit this decision and attempt to
    501   //      merge arbitrary sub-sequences of instructions.
    502   BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
    503   typedef std::set<BasicBlock *, BasicBlock::Less<TargetsToBlocks> > Set;
    504   Set seen_basic_blocks(less);
    505   for (TargetsToBlocks::iterator iter = blocks->begin();
    506        iter != blocks->end();
    507        ++iter) {
    508     BasicBlock *bb = iter->second;
    509     Set::const_iterator entry = seen_basic_blocks.find(bb);
    510     if (entry == seen_basic_blocks.end()) {
    511       // This is the first time we see this particular sequence of
    512       // instructions. Enter the basic block into the set of known
    513       // basic blocks.
    514       seen_basic_blocks.insert(bb);
    515     } else {
    516       // We have previously seen another basic block that defines the same
    517       // sequence of instructions. Merge the two blocks and update the
    518       // pointer in the "blocks" map.
    519       iter->second = *entry;
    520     }
    521   }
    522 }
    523 
    524 void CodeGen::ComputeIncomingBranches(BasicBlock *block,
    525                                       const TargetsToBlocks& targets_to_blocks,
    526                                       IncomingBranches *incoming_branches) {
    527   // We increment the number of incoming branches each time we encounter a
    528   // basic block. But we only traverse recursively the very first time we
    529   // encounter a new block. This is necessary to make topological sorting
    530   // work correctly.
    531   if (++(*incoming_branches)[block] == 1) {
    532     Instruction *last_insn = block->instructions.back();
    533     if (BPF_CLASS(last_insn->code) == BPF_JMP) {
    534       ComputeIncomingBranches(
    535                              targets_to_blocks.find(last_insn->jt_ptr)->second,
    536                              targets_to_blocks, incoming_branches);
    537       if (BPF_OP(last_insn->code) != BPF_JA) {
    538         ComputeIncomingBranches(
    539                              targets_to_blocks.find(last_insn->jf_ptr)->second,
    540                              targets_to_blocks, incoming_branches);
    541       }
    542     } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
    543       ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
    544                               targets_to_blocks, incoming_branches);
    545     }
    546   }
    547 }
    548 
    549 void CodeGen::TopoSortBasicBlocks(BasicBlock *first_block,
    550                                   const TargetsToBlocks& blocks,
    551                                   BasicBlocks *basic_blocks) {
    552   // Textbook implementation of a toposort. We keep looking for basic blocks
    553   // that don't have any incoming branches (initially, this is just the
    554   // "first_block") and add them to the topologically sorted list of
    555   // "basic_blocks". As we do so, we remove outgoing branches. This potentially
    556   // ends up making our descendants eligible for the sorted list. The
    557   // sorting algorithm terminates when there are no more basic blocks that have
    558   // no incoming branches. If we didn't move all blocks from the set of
    559   // "unordered_blocks" to the sorted list of "basic_blocks", there must have
    560   // been a cyclic dependency. This should never happen in a BPF program, as
    561   // well-formed BPF programs only ever have forward branches.
    562   IncomingBranches unordered_blocks;
    563   ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
    564 
    565   std::set<BasicBlock *> heads;
    566   for (;;) {
    567     // Move block from "unordered_blocks" to "basic_blocks".
    568     basic_blocks->push_back(first_block);
    569 
    570     // Inspect last instruction in the basic block. This is typically either a
    571     // jump or a return statement. But it could also be a "normal" instruction
    572     // that is followed by a jump target.
    573     Instruction *last_insn = first_block->instructions.back();
    574     if (BPF_CLASS(last_insn->code) == BPF_JMP) {
    575       // Remove outgoing branches. This might end up moving our descendants
    576       // into set of "head" nodes that no longer have any incoming branches.
    577       TargetsToBlocks::const_iterator iter;
    578       if (BPF_OP(last_insn->code) != BPF_JA) {
    579         iter = blocks.find(last_insn->jf_ptr);
    580         if (!--unordered_blocks[iter->second]) {
    581           heads.insert(iter->second);
    582         }
    583       }
    584       iter = blocks.find(last_insn->jt_ptr);
    585       if (!--unordered_blocks[iter->second]) {
    586         first_block = iter->second;
    587         continue;
    588       }
    589     } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
    590       // We encountered an instruction that doesn't change code flow. Try to
    591       // pick the next "first_block" from "last_insn->next", if possible.
    592       TargetsToBlocks::const_iterator iter;
    593       iter = blocks.find(last_insn->next);
    594       if (!--unordered_blocks[iter->second]) {
    595         first_block = iter->second;
    596         continue;
    597       } else {
    598         // Our basic block is supposed to be followed by "last_insn->next",
    599         // but dependencies prevent this from happening. Insert a BPF_JA
    600         // instruction to correct the code flow.
    601         Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, last_insn->next);
    602         first_block->instructions.push_back(ja);
    603         last_insn->next = ja;
    604       }
    605     }
    606     if (heads.empty()) {
    607       if (unordered_blocks.size() != basic_blocks->size()) {
    608         SANDBOX_DIE("Internal compiler error; cyclic graph detected");
    609       }
    610       return;
    611     }
    612     // Proceed by picking an arbitrary node from the set of basic blocks that
    613     // do not have any incoming branches.
    614     first_block = *heads.begin();
    615     heads.erase(heads.begin());
    616   }
    617 }
    618 
    619 void CodeGen::ComputeRelativeJumps(BasicBlocks *basic_blocks,
    620                                    const TargetsToBlocks& targets_to_blocks) {
    621   // While we previously used pointers in jt_ptr and jf_ptr to link jump
    622   // instructions to their targets, we now convert these jumps to relative
    623   // jumps that are suitable for loading the BPF program into the kernel.
    624   int offset = 0;
    625 
    626   // Since we just completed a toposort, all jump targets are guaranteed to
    627   // go forward. This means, iterating over the basic blocks in reverse makes
    628   // it trivial to compute the correct offsets.
    629   BasicBlock *bb = NULL;
    630   BasicBlock *last_bb = NULL;
    631   for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
    632        iter != basic_blocks->rend();
    633        ++iter) {
    634     last_bb = bb;
    635     bb = *iter;
    636     Instruction *insn = bb->instructions.back();
    637     if (BPF_CLASS(insn->code) == BPF_JMP) {
    638       // Basic block ended in a jump instruction. We can now compute the
    639       // appropriate offsets.
    640       if (BPF_OP(insn->code) == BPF_JA) {
    641         // "Always" jumps use the 32bit "k" field for the offset, instead
    642         // of the 8bit "jt" and "jf" fields.
    643         int jmp =
    644           offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
    645         insn->k  = jmp;
    646         insn->jt = insn->jf = 0;
    647       } else {
    648         // The offset computations for conditional jumps are just the same
    649         // as for "always" jumps.
    650         int jt = offset-targets_to_blocks.find(insn->jt_ptr)->second->offset;
    651         int jf = offset-targets_to_blocks.find(insn->jf_ptr)->second->offset;
    652 
    653         // There is an added complication, because conditional relative jumps
    654         // can only jump at most 255 instructions forward. If we have to jump
    655         // further, insert an extra "always" jump.
    656         Instructions::size_type jmp = bb->instructions.size();
    657         if (jt > 255 || (jt == 255 && jf > 255)) {
    658           Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, insn->jt_ptr);
    659           bb->instructions.push_back(ja);
    660           ja->k  = jt;
    661           ja->jt = ja->jf = 0;
    662 
    663           // The newly inserted "always" jump, of course, requires us to adjust
    664           // the jump targets in the original conditional jump.
    665           jt = 0;
    666           ++jf;
    667         }
    668         if (jf > 255) {
    669           Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, insn->jf_ptr);
    670           bb->instructions.insert(bb->instructions.begin() + jmp, ja);
    671           ja->k  = jf;
    672           ja->jt = ja->jf = 0;
    673 
    674           // Again, we have to adjust the jump targets in the original
    675           // conditional jump.
    676           ++jt;
    677           jf = 0;
    678         }
    679 
    680         // Now we can finally set the relative jump targets in the conditional
    681         // jump instruction. Afterwards, we must no longer access the jt_ptr
    682         // and jf_ptr fields.
    683         insn->jt = jt;
    684         insn->jf = jf;
    685       }
    686     } else if (BPF_CLASS(insn->code) != BPF_RET &&
    687                targets_to_blocks.find(insn->next)->second != last_bb) {
    688       SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
    689     }
    690 
    691     // Proceed to next basic block.
    692     offset += bb->instructions.size();
    693     bb->offset = offset;
    694   }
    695   return;
    696 }
    697 
    698 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
    699                                      Sandbox::Program *program) {
    700   // Our basic blocks have been sorted and relative jump offsets have been
    701   // computed. The last remaining step is for all the instructions in our
    702   // basic blocks to be concatenated into a BPF program.
    703   program->clear();
    704   for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
    705        bb_iter != basic_blocks.end();
    706        ++bb_iter) {
    707     const BasicBlock& bb = **bb_iter;
    708     for (Instructions::const_iterator insn_iter = bb.instructions.begin();
    709          insn_iter != bb.instructions.end();
    710          ++insn_iter) {
    711       const Instruction& insn = **insn_iter;
    712       program->push_back(
    713         (struct sock_filter) { insn.code, insn.jt, insn.jf, insn.k });
    714     }
    715   }
    716   return;
    717 }
    718 
    719 void CodeGen::Compile(Instruction *instructions, Sandbox::Program *program) {
    720   if (compiled_) {
    721     SANDBOX_DIE("Cannot call Compile() multiple times. Create a new code "
    722                 "generator instead");
    723   }
    724   compiled_ = true;
    725 
    726   BranchTargets branch_targets;
    727   FindBranchTargets(*instructions, &branch_targets);
    728   TargetsToBlocks all_blocks;
    729   BasicBlock *first_block =
    730     CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
    731   MergeTails(&all_blocks);
    732   BasicBlocks basic_blocks;
    733   TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
    734   ComputeRelativeJumps(&basic_blocks, all_blocks);
    735   ConcatenateBasicBlocks(basic_blocks, program);
    736   return;
    737 }
    738 
    739 }  // namespace
    740