Home | History | Annotate | Download | only in seccomp-bpf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <stdio.h>
      6 
      7 #include "sandbox/linux/seccomp-bpf/codegen.h"
      8 
      9 namespace {
     10 
     11 // Helper function for Traverse().
     12 void TraverseRecursively(std::set<sandbox::Instruction*>* visited,
     13                          sandbox::Instruction* instruction) {
     14   if (visited->find(instruction) == visited->end()) {
     15     visited->insert(instruction);
     16     switch (BPF_CLASS(instruction->code)) {
     17       case BPF_JMP:
     18         if (BPF_OP(instruction->code) != BPF_JA) {
     19           TraverseRecursively(visited, instruction->jf_ptr);
     20         }
     21         TraverseRecursively(visited, instruction->jt_ptr);
     22         break;
     23       case BPF_RET:
     24         break;
     25       default:
     26         TraverseRecursively(visited, instruction->next);
     27         break;
     28     }
     29   }
     30 }
     31 
     32 }  // namespace
     33 
     34 namespace sandbox {
     35 
     36 CodeGen::CodeGen() : compiled_(false) {}
     37 
     38 CodeGen::~CodeGen() {
     39   for (Instructions::iterator iter = instructions_.begin();
     40        iter != instructions_.end();
     41        ++iter) {
     42     delete *iter;
     43   }
     44   for (BasicBlocks::iterator iter = basic_blocks_.begin();
     45        iter != basic_blocks_.end();
     46        ++iter) {
     47     delete *iter;
     48   }
     49 }
     50 
     51 void CodeGen::PrintProgram(const SandboxBPF::Program& program) {
     52   for (SandboxBPF::Program::const_iterator iter = program.begin();
     53        iter != program.end();
     54        ++iter) {
     55     int ip = (int)(iter - program.begin());
     56     fprintf(stderr, "%3d) ", ip);
     57     switch (BPF_CLASS(iter->code)) {
     58       case BPF_LD:
     59         if (iter->code == BPF_LD + BPF_W + BPF_ABS) {
     60           fprintf(stderr, "LOAD %d  // ", (int)iter->k);
     61           if (iter->k == offsetof(struct arch_seccomp_data, nr)) {
     62             fprintf(stderr, "System call number\n");
     63           } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) {
     64             fprintf(stderr, "Architecture\n");
     65           } else if (iter->k ==
     66                      offsetof(struct arch_seccomp_data, instruction_pointer)) {
     67             fprintf(stderr, "Instruction pointer (LSB)\n");
     68           } else if (iter->k ==
     69                      offsetof(struct arch_seccomp_data, instruction_pointer) +
     70                          4) {
     71             fprintf(stderr, "Instruction pointer (MSB)\n");
     72           } else if (iter->k >= offsetof(struct arch_seccomp_data, args) &&
     73                      iter->k < offsetof(struct arch_seccomp_data, args) + 48 &&
     74                      (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 ==
     75                          0) {
     76             fprintf(
     77                 stderr,
     78                 "Argument %d (%cSB)\n",
     79                 (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8,
     80                 (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M'
     81                                                                          : 'L');
     82           } else {
     83             fprintf(stderr, "???\n");
     84           }
     85         } else {
     86           fprintf(stderr, "LOAD ???\n");
     87         }
     88         break;
     89       case BPF_JMP:
     90         if (BPF_OP(iter->code) == BPF_JA) {
     91           fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
     92         } else {
     93           fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
     94               BPF_OP(iter->code) == BPF_JSET ? "&" :
     95               BPF_OP(iter->code) == BPF_JEQ ? "==" :
     96               BPF_OP(iter->code) == BPF_JGE ? ">=" :
     97               BPF_OP(iter->code) == BPF_JGT ? ">"  : "???",
     98               (int)iter->k,
     99               ip + iter->jt + 1, ip + iter->jf + 1);
    100         }
    101         break;
    102       case BPF_RET:
    103         fprintf(stderr, "RET 0x%x  // ", iter->k);
    104         if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) {
    105           fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA);
    106         } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
    107           fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA);
    108         } else if (iter->k == SECCOMP_RET_ALLOW) {
    109           fprintf(stderr, "Allowed\n");
    110         } else {
    111           fprintf(stderr, "???\n");
    112         }
    113         break;
    114       case BPF_ALU:
    115         fprintf(stderr, BPF_OP(iter->code) == BPF_NEG
    116             ? "A := -A\n" : "A := A %s 0x%x\n",
    117             BPF_OP(iter->code) == BPF_ADD ? "+"  :
    118             BPF_OP(iter->code) == BPF_SUB ? "-"  :
    119             BPF_OP(iter->code) == BPF_MUL ? "*"  :
    120             BPF_OP(iter->code) == BPF_DIV ? "/"  :
    121             BPF_OP(iter->code) == BPF_MOD ? "%"  :
    122             BPF_OP(iter->code) == BPF_OR  ? "|"  :
    123             BPF_OP(iter->code) == BPF_XOR ? "^"  :
    124             BPF_OP(iter->code) == BPF_AND ? "&"  :
    125             BPF_OP(iter->code) == BPF_LSH ? "<<" :
    126             BPF_OP(iter->code) == BPF_RSH ? ">>" : "???",
    127             (int)iter->k);
    128         break;
    129       default:
    130         fprintf(stderr, "???\n");
    131         break;
    132     }
    133   }
    134   return;
    135 }
    136 
    137 Instruction* CodeGen::MakeInstruction(uint16_t code,
    138                                       uint32_t k,
    139                                       Instruction* next) {
    140   // We can handle non-jumping instructions and "always" jumps. Both of
    141   // them are followed by exactly one "next" instruction.
    142   // We allow callers to defer specifying "next", but then they must call
    143   // "joinInstructions" later.
    144   if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
    145     SANDBOX_DIE(
    146         "Must provide both \"true\" and \"false\" branch "
    147         "for a BPF_JMP");
    148   }
    149   if (next && BPF_CLASS(code) == BPF_RET) {
    150     SANDBOX_DIE("Cannot append instructions after a return statement");
    151   }
    152   if (BPF_CLASS(code) == BPF_JMP) {
    153     // "Always" jumps use the "true" branch target, only.
    154     Instruction* insn = new Instruction(code, 0, next, NULL);
    155     instructions_.push_back(insn);
    156     return insn;
    157   } else {
    158     // Non-jumping instructions do not use any of the branch targets.
    159     Instruction* insn = new Instruction(code, k, next);
    160     instructions_.push_back(insn);
    161     return insn;
    162   }
    163 }
    164 
    165 Instruction* CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) {
    166   if (BPF_CLASS(code) != BPF_RET) {
    167     SANDBOX_DIE("ErrorCodes can only be used in return expressions");
    168   }
    169   if (err.error_type_ != ErrorCode::ET_SIMPLE &&
    170       err.error_type_ != ErrorCode::ET_TRAP) {
    171     SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program");
    172   }
    173   return MakeInstruction(code, err.err_);
    174 }
    175 
    176 Instruction* CodeGen::MakeInstruction(uint16_t code,
    177                                       uint32_t k,
    178                                       Instruction* jt,
    179                                       Instruction* jf) {
    180   // We can handle all conditional jumps. They are followed by both a
    181   // "true" and a "false" branch.
    182   if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
    183     SANDBOX_DIE("Expected a BPF_JMP instruction");
    184   }
    185   if (!jt && !jf) {
    186     // We allow callers to defer specifying exactly one of the branch
    187     // targets. It must then be set later by calling "JoinInstructions".
    188     SANDBOX_DIE("Branches must jump to a valid instruction");
    189   }
    190   Instruction* insn = new Instruction(code, k, jt, jf);
    191   instructions_.push_back(insn);
    192   return insn;
    193 }
    194 
    195 void CodeGen::JoinInstructions(Instruction* head, Instruction* tail) {
    196   // Merge two instructions, or set the branch target for an "always" jump.
    197   // This function should be called, if the caller didn't initially provide
    198   // a value for "next" when creating the instruction.
    199   if (BPF_CLASS(head->code) == BPF_JMP) {
    200     if (BPF_OP(head->code) == BPF_JA) {
    201       if (head->jt_ptr) {
    202         SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
    203       }
    204       head->jt_ptr = tail;
    205     } else {
    206       if (!head->jt_ptr && head->jf_ptr) {
    207         head->jt_ptr = tail;
    208       } else if (!head->jf_ptr && head->jt_ptr) {
    209         head->jf_ptr = tail;
    210       } else {
    211         SANDBOX_DIE("Cannot append instructions after a jump");
    212       }
    213     }
    214   } else if (BPF_CLASS(head->code) == BPF_RET) {
    215     SANDBOX_DIE("Cannot append instructions after a return statement");
    216   } else if (head->next) {
    217     SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
    218   } else {
    219     head->next = tail;
    220   }
    221   return;
    222 }
    223 
    224 void CodeGen::Traverse(Instruction* instruction,
    225                        void (*fnc)(Instruction*, void*),
    226                        void* aux) {
    227   std::set<Instruction*> visited;
    228   TraverseRecursively(&visited, instruction);
    229   for (std::set<Instruction*>::const_iterator iter = visited.begin();
    230        iter != visited.end();
    231        ++iter) {
    232     fnc(*iter, aux);
    233   }
    234 }
    235 
    236 void CodeGen::FindBranchTargets(const Instruction& instructions,
    237                                 BranchTargets* branch_targets) {
    238   // Follow all possible paths through the "instructions" graph and compute
    239   // a list of branch targets. This will later be needed to compute the
    240   // boundaries of basic blocks.
    241   // We maintain a set of all instructions that we have previously seen. This
    242   // set ultimately converges on all instructions in the program.
    243   std::set<const Instruction*> seen_instructions;
    244   Instructions stack;
    245   for (const Instruction* insn = &instructions; insn;) {
    246     seen_instructions.insert(insn);
    247     if (BPF_CLASS(insn->code) == BPF_JMP) {
    248       // Found a jump. Increase count of incoming edges for each of the jump
    249       // targets.
    250       ++(*branch_targets)[insn->jt_ptr];
    251       if (BPF_OP(insn->code) != BPF_JA) {
    252         ++(*branch_targets)[insn->jf_ptr];
    253         stack.push_back(const_cast<Instruction*>(insn));
    254       }
    255       // Start a recursive decent for depth-first traversal.
    256       if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
    257         // We haven't seen the "true" branch yet. Traverse it now. We have
    258         // already remembered the "false" branch on the stack and will
    259         // traverse it later.
    260         insn = insn->jt_ptr;
    261         continue;
    262       } else {
    263         // Now try traversing the "false" branch.
    264         insn = NULL;
    265       }
    266     } else {
    267       // This is a non-jump instruction, just continue to the next instruction
    268       // (if any). It's OK if "insn" becomes NULL when reaching a return
    269       // instruction.
    270       if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
    271         SANDBOX_DIE(
    272             "Internal compiler error; return instruction must be at "
    273             "the end of the BPF program");
    274       }
    275       if (seen_instructions.find(insn->next) == seen_instructions.end()) {
    276         insn = insn->next;
    277       } else {
    278         // We have seen this instruction before. That could happen if it is
    279         // a branch target. No need to continue processing.
    280         insn = NULL;
    281       }
    282     }
    283     while (!insn && !stack.empty()) {
    284       // We are done processing all the way to a leaf node, backtrack up the
    285       // stack to any branches that we haven't processed yet. By definition,
    286       // this has to be a "false" branch, as we always process the "true"
    287       // branches right away.
    288       insn = stack.back();
    289       stack.pop_back();
    290       if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
    291         // We haven't seen the "false" branch yet. So, that's where we'll
    292         // go now.
    293         insn = insn->jf_ptr;
    294       } else {
    295         // We have seen both the "true" and the "false" branch, continue
    296         // up the stack.
    297         if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
    298           SANDBOX_DIE(
    299               "Internal compiler error; cannot find all "
    300               "branch targets");
    301         }
    302         insn = NULL;
    303       }
    304     }
    305   }
    306   return;
    307 }
    308 
    309 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) {
    310   // Iterate over all the instructions between "head" and "tail" and
    311   // insert them into a new basic block.
    312   BasicBlock* bb = new BasicBlock;
    313   for (;; head = head->next) {
    314     bb->instructions.push_back(head);
    315     if (head == tail) {
    316       break;
    317     }
    318     if (BPF_CLASS(head->code) == BPF_JMP) {
    319       SANDBOX_DIE("Found a jump inside of a basic block");
    320     }
    321   }
    322   basic_blocks_.push_back(bb);
    323   return bb;
    324 }
    325 
    326 void CodeGen::AddBasicBlock(Instruction* head,
    327                             Instruction* tail,
    328                             const BranchTargets& branch_targets,
    329                             TargetsToBlocks* basic_blocks,
    330                             BasicBlock** firstBlock) {
    331   // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
    332   // has not been set before.
    333   BranchTargets::const_iterator iter = branch_targets.find(head);
    334   if ((iter == branch_targets.end()) != !*firstBlock ||
    335       !*firstBlock != basic_blocks->empty()) {
    336     SANDBOX_DIE(
    337         "Only the very first basic block should have no "
    338         "incoming jumps");
    339   }
    340   BasicBlock* bb = MakeBasicBlock(head, tail);
    341   if (!*firstBlock) {
    342     *firstBlock = bb;
    343   }
    344   (*basic_blocks)[head] = bb;
    345   return;
    346 }
    347 
    348 BasicBlock* CodeGen::CutGraphIntoBasicBlocks(
    349     Instruction* instructions,
    350     const BranchTargets& branch_targets,
    351     TargetsToBlocks* basic_blocks) {
    352   // Textbook implementation of a basic block generator. All basic blocks
    353   // start with a branch target and end with either a return statement or
    354   // a jump (or are followed by an instruction that forms the beginning of a
    355   // new block). Both conditional and "always" jumps are supported.
    356   BasicBlock* first_block = NULL;
    357   std::set<const Instruction*> seen_instructions;
    358   Instructions stack;
    359   Instruction* tail = NULL;
    360   Instruction* head = instructions;
    361   for (Instruction* insn = head; insn;) {
    362     if (seen_instructions.find(insn) != seen_instructions.end()) {
    363       // We somehow went in a circle. This should never be possible. Not even
    364       // cyclic graphs are supposed to confuse us this much.
    365       SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
    366     }
    367     seen_instructions.insert(insn);
    368     if (tail && branch_targets.find(insn) != branch_targets.end()) {
    369       // We reached a branch target. Start a new basic block (this means,
    370       // flushing the previous basic block first).
    371       AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
    372       head = insn;
    373     }
    374     if (BPF_CLASS(insn->code) == BPF_JMP) {
    375       // We reached a jump instruction, this completes our current basic
    376       // block. Flush it and continue by traversing both the true and the
    377       // false branch of the jump. We need to maintain a stack to do so.
    378       AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
    379       if (BPF_OP(insn->code) != BPF_JA) {
    380         stack.push_back(insn->jf_ptr);
    381       }
    382       insn = insn->jt_ptr;
    383 
    384       // If we are jumping to an instruction that we have previously
    385       // processed, we are done with this branch. Continue by backtracking
    386       // up the stack.
    387       while (seen_instructions.find(insn) != seen_instructions.end()) {
    388       backtracking:
    389         if (stack.empty()) {
    390           // We successfully traversed all reachable instructions.
    391           return first_block;
    392         } else {
    393           // Going up the stack.
    394           insn = stack.back();
    395           stack.pop_back();
    396         }
    397       }
    398       // Starting a new basic block.
    399       tail = NULL;
    400       head = insn;
    401     } else {
    402       // We found a non-jumping instruction, append it to current basic
    403       // block.
    404       tail = insn;
    405       insn = insn->next;
    406       if (!insn) {
    407         // We reached a return statement, flush the current basic block and
    408         // backtrack up the stack.
    409         AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
    410         goto backtracking;
    411       }
    412     }
    413   }
    414   return first_block;
    415 }
    416 
    417 // We define a comparator that inspects the sequence of instructions in our
    418 // basic block and any blocks referenced by this block. This function can be
    419 // used in a "less" comparator for the purpose of storing pointers to basic
    420 // blocks in STL containers; this gives an easy option to use STL to find
    421 // shared tail  sequences of basic blocks.
    422 static int PointerCompare(const BasicBlock* block1,
    423                           const BasicBlock* block2,
    424                           const TargetsToBlocks& blocks) {
    425   // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
    426   // If we are looking at the exact same block, this is trivial and we don't
    427   // need to do a full comparison.
    428   if (block1 == block2) {
    429     return 0;
    430   }
    431 
    432   // We compare the sequence of instructions in both basic blocks.
    433   const Instructions& insns1 = block1->instructions;
    434   const Instructions& insns2 = block2->instructions;
    435   Instructions::const_iterator iter1 = insns1.begin();
    436   Instructions::const_iterator iter2 = insns2.begin();
    437   for (;; ++iter1, ++iter2) {
    438     // If we have reached the end of the sequence of instructions in one or
    439     // both basic blocks, we know the relative ordering between the two blocks
    440     // and can return.
    441     if (iter1 == insns1.end()) {
    442       return iter2 == insns2.end() ? 0 : -1;
    443     } else if (iter2 == insns2.end()) {
    444       return 1;
    445     }
    446 
    447     // Compare the individual fields for both instructions.
    448     const Instruction& insn1 = **iter1;
    449     const Instruction& insn2 = **iter2;
    450     if (insn1.code == insn2.code) {
    451       if (insn1.k == insn2.k) {
    452         // Only conditional jump instructions use the jt_ptr and jf_ptr
    453         // fields.
    454         if (BPF_CLASS(insn1.code) == BPF_JMP) {
    455           if (BPF_OP(insn1.code) != BPF_JA) {
    456             // Recursively compare the "true" and "false" branches.
    457             // A well-formed BPF program can't have any cycles, so we know
    458             // that our recursive algorithm will ultimately terminate.
    459             // In the unlikely event that the programmer made a mistake and
    460             // went out of the way to give us a cyclic program, we will crash
    461             // with a stack overflow. We are OK with that.
    462             int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
    463                                    blocks.find(insn2.jt_ptr)->second,
    464                                    blocks);
    465             if (c == 0) {
    466               c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
    467                                  blocks.find(insn2.jf_ptr)->second,
    468                                  blocks);
    469               if (c == 0) {
    470                 continue;
    471               } else {
    472                 return c;
    473               }
    474             } else {
    475               return c;
    476             }
    477           } else {
    478             int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
    479                                    blocks.find(insn2.jt_ptr)->second,
    480                                    blocks);
    481             if (c == 0) {
    482               continue;
    483             } else {
    484               return c;
    485             }
    486           }
    487         } else {
    488           continue;
    489         }
    490       } else {
    491         return insn1.k - insn2.k;
    492       }
    493     } else {
    494       return insn1.code - insn2.code;
    495     }
    496   }
    497 }
    498 
    499 void CodeGen::MergeTails(TargetsToBlocks* blocks) {
    500   // We enter all of our basic blocks into a set using the BasicBlock::Less()
    501   // comparator. This naturally results in blocks with identical tails of
    502   // instructions to map to the same entry in the set. Whenever we discover
    503   // that a particular chain of instructions is already in the set, we merge
    504   // the basic blocks and update the pointer in the "blocks" map.
    505   // Returns the number of unique basic blocks.
    506   // N.B. We don't merge instructions on a granularity that is finer than
    507   //      a basic block. In practice, this is sufficiently rare that we don't
    508   //      incur a big cost.
    509   //      Similarly, we currently don't merge anything other than tails. In
    510   //      the future, we might decide to revisit this decision and attempt to
    511   //      merge arbitrary sub-sequences of instructions.
    512   BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
    513   typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set;
    514   Set seen_basic_blocks(less);
    515   for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end();
    516        ++iter) {
    517     BasicBlock* bb = iter->second;
    518     Set::const_iterator entry = seen_basic_blocks.find(bb);
    519     if (entry == seen_basic_blocks.end()) {
    520       // This is the first time we see this particular sequence of
    521       // instructions. Enter the basic block into the set of known
    522       // basic blocks.
    523       seen_basic_blocks.insert(bb);
    524     } else {
    525       // We have previously seen another basic block that defines the same
    526       // sequence of instructions. Merge the two blocks and update the
    527       // pointer in the "blocks" map.
    528       iter->second = *entry;
    529     }
    530   }
    531 }
    532 
    533 void CodeGen::ComputeIncomingBranches(BasicBlock* block,
    534                                       const TargetsToBlocks& targets_to_blocks,
    535                                       IncomingBranches* incoming_branches) {
    536   // We increment the number of incoming branches each time we encounter a
    537   // basic block. But we only traverse recursively the very first time we
    538   // encounter a new block. This is necessary to make topological sorting
    539   // work correctly.
    540   if (++(*incoming_branches)[block] == 1) {
    541     Instruction* last_insn = block->instructions.back();
    542     if (BPF_CLASS(last_insn->code) == BPF_JMP) {
    543       ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second,
    544                               targets_to_blocks,
    545                               incoming_branches);
    546       if (BPF_OP(last_insn->code) != BPF_JA) {
    547         ComputeIncomingBranches(
    548             targets_to_blocks.find(last_insn->jf_ptr)->second,
    549             targets_to_blocks,
    550             incoming_branches);
    551       }
    552     } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
    553       ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
    554                               targets_to_blocks,
    555                               incoming_branches);
    556     }
    557   }
    558 }
    559 
    560 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block,
    561                                   const TargetsToBlocks& blocks,
    562                                   BasicBlocks* basic_blocks) {
    563   // Textbook implementation of a toposort. We keep looking for basic blocks
    564   // that don't have any incoming branches (initially, this is just the
    565   // "first_block") and add them to the topologically sorted list of
    566   // "basic_blocks". As we do so, we remove outgoing branches. This potentially
    567   // ends up making our descendants eligible for the sorted list. The
    568   // sorting algorithm terminates when there are no more basic blocks that have
    569   // no incoming branches. If we didn't move all blocks from the set of
    570   // "unordered_blocks" to the sorted list of "basic_blocks", there must have
    571   // been a cyclic dependency. This should never happen in a BPF program, as
    572   // well-formed BPF programs only ever have forward branches.
    573   IncomingBranches unordered_blocks;
    574   ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
    575 
    576   std::set<BasicBlock*> heads;
    577   for (;;) {
    578     // Move block from "unordered_blocks" to "basic_blocks".
    579     basic_blocks->push_back(first_block);
    580 
    581     // Inspect last instruction in the basic block. This is typically either a
    582     // jump or a return statement. But it could also be a "normal" instruction
    583     // that is followed by a jump target.
    584     Instruction* last_insn = first_block->instructions.back();
    585     if (BPF_CLASS(last_insn->code) == BPF_JMP) {
    586       // Remove outgoing branches. This might end up moving our descendants
    587       // into set of "head" nodes that no longer have any incoming branches.
    588       TargetsToBlocks::const_iterator iter;
    589       if (BPF_OP(last_insn->code) != BPF_JA) {
    590         iter = blocks.find(last_insn->jf_ptr);
    591         if (!--unordered_blocks[iter->second]) {
    592           heads.insert(iter->second);
    593         }
    594       }
    595       iter = blocks.find(last_insn->jt_ptr);
    596       if (!--unordered_blocks[iter->second]) {
    597         first_block = iter->second;
    598         continue;
    599       }
    600     } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
    601       // We encountered an instruction that doesn't change code flow. Try to
    602       // pick the next "first_block" from "last_insn->next", if possible.
    603       TargetsToBlocks::const_iterator iter;
    604       iter = blocks.find(last_insn->next);
    605       if (!--unordered_blocks[iter->second]) {
    606         first_block = iter->second;
    607         continue;
    608       } else {
    609         // Our basic block is supposed to be followed by "last_insn->next",
    610         // but dependencies prevent this from happening. Insert a BPF_JA
    611         // instruction to correct the code flow.
    612         Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next);
    613         first_block->instructions.push_back(ja);
    614         last_insn->next = ja;
    615       }
    616     }
    617     if (heads.empty()) {
    618       if (unordered_blocks.size() != basic_blocks->size()) {
    619         SANDBOX_DIE("Internal compiler error; cyclic graph detected");
    620       }
    621       return;
    622     }
    623     // Proceed by picking an arbitrary node from the set of basic blocks that
    624     // do not have any incoming branches.
    625     first_block = *heads.begin();
    626     heads.erase(heads.begin());
    627   }
    628 }
    629 
    630 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks,
    631                                    const TargetsToBlocks& targets_to_blocks) {
    632   // While we previously used pointers in jt_ptr and jf_ptr to link jump
    633   // instructions to their targets, we now convert these jumps to relative
    634   // jumps that are suitable for loading the BPF program into the kernel.
    635   int offset = 0;
    636 
    637   // Since we just completed a toposort, all jump targets are guaranteed to
    638   // go forward. This means, iterating over the basic blocks in reverse makes
    639   // it trivial to compute the correct offsets.
    640   BasicBlock* bb = NULL;
    641   BasicBlock* last_bb = NULL;
    642   for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
    643        iter != basic_blocks->rend();
    644        ++iter) {
    645     last_bb = bb;
    646     bb = *iter;
    647     Instruction* insn = bb->instructions.back();
    648     if (BPF_CLASS(insn->code) == BPF_JMP) {
    649       // Basic block ended in a jump instruction. We can now compute the
    650       // appropriate offsets.
    651       if (BPF_OP(insn->code) == BPF_JA) {
    652         // "Always" jumps use the 32bit "k" field for the offset, instead
    653         // of the 8bit "jt" and "jf" fields.
    654         int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
    655         insn->k = jmp;
    656         insn->jt = insn->jf = 0;
    657       } else {
    658         // The offset computations for conditional jumps are just the same
    659         // as for "always" jumps.
    660         int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
    661         int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset;
    662 
    663         // There is an added complication, because conditional relative jumps
    664         // can only jump at most 255 instructions forward. If we have to jump
    665         // further, insert an extra "always" jump.
    666         Instructions::size_type jmp = bb->instructions.size();
    667         if (jt > 255 || (jt == 255 && jf > 255)) {
    668           Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr);
    669           bb->instructions.push_back(ja);
    670           ja->k = jt;
    671           ja->jt = ja->jf = 0;
    672 
    673           // The newly inserted "always" jump, of course, requires us to adjust
    674           // the jump targets in the original conditional jump.
    675           jt = 0;
    676           ++jf;
    677         }
    678         if (jf > 255) {
    679           Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr);
    680           bb->instructions.insert(bb->instructions.begin() + jmp, ja);
    681           ja->k = jf;
    682           ja->jt = ja->jf = 0;
    683 
    684           // Again, we have to adjust the jump targets in the original
    685           // conditional jump.
    686           ++jt;
    687           jf = 0;
    688         }
    689 
    690         // Now we can finally set the relative jump targets in the conditional
    691         // jump instruction. Afterwards, we must no longer access the jt_ptr
    692         // and jf_ptr fields.
    693         insn->jt = jt;
    694         insn->jf = jf;
    695       }
    696     } else if (BPF_CLASS(insn->code) != BPF_RET &&
    697                targets_to_blocks.find(insn->next)->second != last_bb) {
    698       SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
    699     }
    700 
    701     // Proceed to next basic block.
    702     offset += bb->instructions.size();
    703     bb->offset = offset;
    704   }
    705   return;
    706 }
    707 
    708 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
    709                                      SandboxBPF::Program* program) {
    710   // Our basic blocks have been sorted and relative jump offsets have been
    711   // computed. The last remaining step is for all the instructions in our
    712   // basic blocks to be concatenated into a BPF program.
    713   program->clear();
    714   for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
    715        bb_iter != basic_blocks.end();
    716        ++bb_iter) {
    717     const BasicBlock& bb = **bb_iter;
    718     for (Instructions::const_iterator insn_iter = bb.instructions.begin();
    719          insn_iter != bb.instructions.end();
    720          ++insn_iter) {
    721       const Instruction& insn = **insn_iter;
    722       program->push_back(
    723           (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k});
    724     }
    725   }
    726   return;
    727 }
    728 
    729 void CodeGen::Compile(Instruction* instructions, SandboxBPF::Program* program) {
    730   if (compiled_) {
    731     SANDBOX_DIE(
    732         "Cannot call Compile() multiple times. Create a new code "
    733         "generator instead");
    734   }
    735   compiled_ = true;
    736 
    737   BranchTargets branch_targets;
    738   FindBranchTargets(*instructions, &branch_targets);
    739   TargetsToBlocks all_blocks;
    740   BasicBlock* first_block =
    741       CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
    742   MergeTails(&all_blocks);
    743   BasicBlocks basic_blocks;
    744   TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
    745   ComputeRelativeJumps(&basic_blocks, all_blocks);
    746   ConcatenateBasicBlocks(basic_blocks, program);
    747   return;
    748 }
    749 
    750 }  // namespace sandbox
    751