Home | History | Annotate | Download | only in bpf_dsl
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "sandbox/linux/bpf_dsl/codegen.h"
      6 
      7 #include <stddef.h>
      8 #include <stdint.h>
      9 
     10 #include <limits>
     11 #include <utility>
     12 
     13 #include "base/logging.h"
     14 #include "sandbox/linux/system_headers/linux_filter.h"
     15 
     16 // This CodeGen implementation strives for simplicity while still
     17 // generating acceptable BPF programs under typical usage patterns
     18 // (e.g., by PolicyCompiler).
     19 //
     20 // The key to its simplicity is that BPF programs only support forward
     21 // jumps/branches, which allows constraining the DAG construction API
     22 // to make instruction nodes immutable. Immutable nodes admits a
     23 // simple greedy approach of emitting new instructions as needed and
     24 // then reusing existing ones that have already been emitted. This
     25 // cleanly avoids any need to compute basic blocks or apply
     26 // topological sorting because the API effectively sorts instructions
     27 // for us (e.g., before MakeInstruction() can be called to emit a
     28 // branch instruction, it must have already been called for each
     29 // branch path).
     30 //
     31 // This greedy algorithm is not without (theoretical) weakness though:
     32 //
     33 //   1. In the general case, we don't eliminate dead code.  If needed,
     34 //      we could trace back through the program in Compile() and elide
     35 //      any unneeded instructions, but in practice we only emit live
     36 //      instructions anyway.
     37 //
     38 //   2. By not dividing instructions into basic blocks and sorting, we
     39 //      lose an opportunity to move non-branch/non-return instructions
     40 //      adjacent to their successor instructions, which means we might
     41 //      need to emit additional jumps. But in practice, they'll
     42 //      already be nearby as long as callers don't go out of their way
     43 //      to interleave MakeInstruction() calls for unrelated code
     44 //      sequences.
     45 
     46 namespace sandbox {
     47 
     48 // kBranchRange is the maximum value that can be stored in
     49 // sock_filter's 8-bit jt and jf fields.
     50 const size_t kBranchRange = std::numeric_limits<uint8_t>::max();
     51 
     52 const CodeGen::Node CodeGen::kNullNode;
     53 
     54 CodeGen::CodeGen() : program_(), equivalent_(), memos_() {
     55 }
     56 
     57 CodeGen::~CodeGen() {
     58 }
     59 
     60 CodeGen::Program CodeGen::Compile(CodeGen::Node head) {
     61   return Program(program_.rbegin() + Offset(head), program_.rend());
     62 }
     63 
     64 CodeGen::Node CodeGen::MakeInstruction(uint16_t code,
     65                                        uint32_t k,
     66                                        Node jt,
     67                                        Node jf) {
     68   // To avoid generating redundant code sequences, we memoize the
     69   // results from AppendInstruction().
     70   auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode));
     71   CodeGen::Node* node = &res.first->second;
     72   if (res.second) {  // Newly inserted memo entry.
     73     *node = AppendInstruction(code, k, jt, jf);
     74   }
     75   return *node;
     76 }
     77 
     78 CodeGen::Node CodeGen::AppendInstruction(uint16_t code,
     79                                          uint32_t k,
     80                                          Node jt,
     81                                          Node jf) {
     82   if (BPF_CLASS(code) == BPF_JMP) {
     83     CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed";
     84 
     85     // Optimally adding jumps is rather tricky, so we use a quick
     86     // approximation: by artificially reducing |jt|'s range, |jt| will
     87     // stay within its true range even if we add a jump for |jf|.
     88     jt = WithinRange(jt, kBranchRange - 1);
     89     jf = WithinRange(jf, kBranchRange);
     90     return Append(code, k, Offset(jt), Offset(jf));
     91   }
     92 
     93   CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf";
     94   if (BPF_CLASS(code) == BPF_RET) {
     95     CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt";
     96   } else {
     97     // For non-branch/non-return instructions, execution always
     98     // proceeds to the next instruction; so we need to arrange for
     99     // that to be |jt|.
    100     jt = WithinRange(jt, 0);
    101     CHECK_EQ(0U, Offset(jt)) << "ICE: Failed to setup next instruction";
    102   }
    103   return Append(code, k, 0, 0);
    104 }
    105 
    106 CodeGen::Node CodeGen::WithinRange(Node target, size_t range) {
    107   // Just use |target| if it's already within range.
    108   if (Offset(target) <= range) {
    109     return target;
    110   }
    111 
    112   // Alternatively, look for an equivalent instruction within range.
    113   if (Offset(equivalent_.at(target)) <= range) {
    114     return equivalent_.at(target);
    115   }
    116 
    117   // Otherwise, fall back to emitting a jump instruction.
    118   Node jump = Append(BPF_JMP | BPF_JA, Offset(target), 0, 0);
    119   equivalent_.at(target) = jump;
    120   return jump;
    121 }
    122 
    123 CodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) {
    124   if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
    125     CHECK_LE(jt, kBranchRange);
    126     CHECK_LE(jf, kBranchRange);
    127   } else {
    128     CHECK_EQ(0U, jt);
    129     CHECK_EQ(0U, jf);
    130   }
    131 
    132   CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS));
    133   CHECK_EQ(program_.size(), equivalent_.size());
    134 
    135   Node res = program_.size();
    136   program_.push_back(sock_filter{
    137       code, static_cast<uint8_t>(jt), static_cast<uint8_t>(jf), k});
    138   equivalent_.push_back(res);
    139   return res;
    140 }
    141 
    142 size_t CodeGen::Offset(Node target) const {
    143   CHECK_LT(target, program_.size()) << "Bogus offset target node";
    144   return (program_.size() - 1) - target;
    145 }
    146 
    147 // TODO(mdempsky): Move into a general base::Tuple helper library.
    148 bool CodeGen::MemoKeyLess::operator()(const MemoKey& lhs,
    149                                       const MemoKey& rhs) const {
    150   if (base::get<0>(lhs) != base::get<0>(rhs))
    151     return base::get<0>(lhs) < base::get<0>(rhs);
    152   if (base::get<1>(lhs) != base::get<1>(rhs))
    153     return base::get<1>(lhs) < base::get<1>(rhs);
    154   if (base::get<2>(lhs) != base::get<2>(rhs))
    155     return base::get<2>(lhs) < base::get<2>(rhs);
    156   if (base::get<3>(lhs) != base::get<3>(rhs))
    157     return base::get<3>(lhs) < base::get<3>(rhs);
    158   return false;
    159 }
    160 
    161 }  // namespace sandbox
    162