Home | History | Annotate | Download | only in bpf_dsl
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "sandbox/linux/bpf_dsl/codegen.h"
      6 
      7 #include <stddef.h>
      8 #include <stdint.h>
      9 
     10 #include <map>
     11 #include <utility>
     12 #include <vector>
     13 
     14 #include "base/macros.h"
     15 #include "base/md5.h"
     16 #include "base/strings/string_piece.h"
     17 #include "sandbox/linux/system_headers/linux_filter.h"
     18 #include "testing/gtest/include/gtest/gtest.h"
     19 
     20 namespace sandbox {
     21 namespace {
     22 
     23 // Hash provides an abstraction for building "hash trees" from BPF
     24 // control flow graphs, and efficiently identifying equivalent graphs.
     25 //
     26 // For simplicity, we use MD5, because base happens to provide a
     27 // convenient API for its use. However, any collision-resistant hash
     28 // should suffice.
     29 class Hash {
     30  public:
     31   static const Hash kZero;
     32 
     33   Hash() : digest_() {}
     34 
     35   Hash(uint16_t code,
     36        uint32_t k,
     37        const Hash& jt = kZero,
     38        const Hash& jf = kZero)
     39       : digest_() {
     40     base::MD5Context ctx;
     41     base::MD5Init(&ctx);
     42     HashValue(&ctx, code);
     43     HashValue(&ctx, k);
     44     HashValue(&ctx, jt);
     45     HashValue(&ctx, jf);
     46     base::MD5Final(&digest_, &ctx);
     47   }
     48 
     49   Hash(const Hash& hash) = default;
     50   Hash& operator=(const Hash& rhs) = default;
     51 
     52   friend bool operator==(const Hash& lhs, const Hash& rhs) {
     53     return lhs.Base16() == rhs.Base16();
     54   }
     55   friend bool operator!=(const Hash& lhs, const Hash& rhs) {
     56     return !(lhs == rhs);
     57   }
     58 
     59  private:
     60   template <typename T>
     61   void HashValue(base::MD5Context* ctx, const T& value) {
     62     base::MD5Update(ctx,
     63                     base::StringPiece(reinterpret_cast<const char*>(&value),
     64                                       sizeof(value)));
     65   }
     66 
     67   std::string Base16() const {
     68     return base::MD5DigestToBase16(digest_);
     69   }
     70 
     71   base::MD5Digest digest_;
     72 };
     73 
     74 const Hash Hash::kZero;
     75 
     76 // Sanity check that equality and inequality work on Hash as required.
     77 TEST(CodeGen, HashSanity) {
     78   std::vector<Hash> hashes;
     79 
     80   // Push a bunch of logically distinct hashes.
     81   hashes.push_back(Hash::kZero);
     82   for (int i = 0; i < 4; ++i) {
     83     hashes.push_back(Hash(i & 1, i & 2));
     84   }
     85   for (int i = 0; i < 16; ++i) {
     86     hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8)));
     87   }
     88   for (int i = 0; i < 64; ++i) {
     89     hashes.push_back(
     90         Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32)));
     91   }
     92 
     93   for (const Hash& a : hashes) {
     94     for (const Hash& b : hashes) {
     95       // Hashes should equal themselves, but not equal all others.
     96       if (&a == &b) {
     97         EXPECT_EQ(a, b);
     98       } else {
     99         EXPECT_NE(a, b);
    100       }
    101     }
    102   }
    103 }
    104 
    105 // ProgramTest provides a fixture for writing compiling sample
    106 // programs with CodeGen and verifying the linearized output matches
    107 // the input DAG.
    108 class ProgramTest : public ::testing::Test {
    109  protected:
    110   ProgramTest() : gen_(), node_hashes_() {}
    111 
    112   // MakeInstruction calls CodeGen::MakeInstruction() and associated
    113   // the returned address with a hash of the instruction.
    114   CodeGen::Node MakeInstruction(uint16_t code,
    115                                 uint32_t k,
    116                                 CodeGen::Node jt = CodeGen::kNullNode,
    117                                 CodeGen::Node jf = CodeGen::kNullNode) {
    118     CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf);
    119     EXPECT_NE(CodeGen::kNullNode, res);
    120 
    121     Hash digest(code, k, Lookup(jt), Lookup(jf));
    122     auto it = node_hashes_.insert(std::make_pair(res, digest));
    123     EXPECT_EQ(digest, it.first->second);
    124 
    125     return res;
    126   }
    127 
    128   // RunTest compiles the program and verifies that the output matches
    129   // what is expected.  It should be called at the end of each program
    130   // test case.
    131   void RunTest(CodeGen::Node head) {
    132     // Compile the program
    133     CodeGen::Program program = gen_.Compile(head);
    134 
    135     // Walk the program backwards, and compute the hash for each instruction.
    136     std::vector<Hash> prog_hashes(program.size());
    137     for (size_t i = program.size(); i > 0; --i) {
    138       const sock_filter& insn = program.at(i - 1);
    139       Hash& hash = prog_hashes.at(i - 1);
    140 
    141       if (BPF_CLASS(insn.code) == BPF_JMP) {
    142         if (BPF_OP(insn.code) == BPF_JA) {
    143           // The compiler adds JA instructions as needed, so skip them.
    144           hash = prog_hashes.at(i + insn.k);
    145         } else {
    146           hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt),
    147                       prog_hashes.at(i + insn.jf));
    148         }
    149       } else if (BPF_CLASS(insn.code) == BPF_RET) {
    150         hash = Hash(insn.code, insn.k);
    151       } else {
    152         hash = Hash(insn.code, insn.k, prog_hashes.at(i));
    153       }
    154     }
    155 
    156     EXPECT_EQ(Lookup(head), prog_hashes.at(0));
    157   }
    158 
    159  private:
    160   const Hash& Lookup(CodeGen::Node next) const {
    161     if (next == CodeGen::kNullNode) {
    162       return Hash::kZero;
    163     }
    164     auto it = node_hashes_.find(next);
    165     if (it == node_hashes_.end()) {
    166       ADD_FAILURE() << "No hash found for node " << next;
    167       return Hash::kZero;
    168     }
    169     return it->second;
    170   }
    171 
    172   CodeGen gen_;
    173   std::map<CodeGen::Node, Hash> node_hashes_;
    174 
    175   DISALLOW_COPY_AND_ASSIGN(ProgramTest);
    176 };
    177 
    178 TEST_F(ProgramTest, OneInstruction) {
    179   // Create the most basic valid BPF program:
    180   //    RET 0
    181   CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0);
    182   RunTest(head);
    183 }
    184 
    185 TEST_F(ProgramTest, SimpleBranch) {
    186   // Create a program with a single branch:
    187   //    JUMP if eq 42 then $0 else $1
    188   // 0: RET 1
    189   // 1: RET 0
    190   CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42,
    191                                        MakeInstruction(BPF_RET + BPF_K, 1),
    192                                        MakeInstruction(BPF_RET + BPF_K, 0));
    193   RunTest(head);
    194 }
    195 
    196 TEST_F(ProgramTest, AtypicalBranch) {
    197   // Create a program with a single branch:
    198   //    JUMP if eq 42 then $0 else $0
    199   // 0: RET 0
    200 
    201   CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0);
    202   CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret);
    203 
    204   // N.B.: As the instructions in both sides of the branch are already
    205   //       the same object, we do not actually have any "mergeable" branches.
    206   //       This needs to be reflected in our choice of "flags".
    207   RunTest(head);
    208 }
    209 
    210 TEST_F(ProgramTest, Complex) {
    211   // Creates a basic BPF program that we'll use to test some of the code:
    212   //    JUMP if eq 42 the $0 else $1     (insn6)
    213   // 0: LD 23                            (insn5)
    214   // 1: JUMP if eq 42 then $2 else $4    (insn4)
    215   // 2: JUMP to $3                       (insn2)
    216   // 3: LD 42                            (insn1)
    217   //    RET 42                           (insn0)
    218   // 4: LD 42                            (insn3)
    219   //    RET 42                           (insn3+)
    220   CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42);
    221   CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0);
    222   CodeGen::Node insn2 = insn1;  // Implicit JUMP
    223 
    224   // We explicitly duplicate instructions to test that they're merged.
    225   CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42,
    226                                         MakeInstruction(BPF_RET + BPF_K, 42));
    227   EXPECT_EQ(insn2, insn3);
    228 
    229   CodeGen::Node insn4 =
    230       MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3);
    231   CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4);
    232 
    233   // Force a basic block that ends in neither a jump instruction nor a return
    234   // instruction. It only contains "insn5". This exercises one of the less
    235   // common code paths in the topo-sort algorithm.
    236   // This also gives us a diamond-shaped pattern in our graph, which stresses
    237   // another aspect of the topo-sort algorithm (namely, the ability to
    238   // correctly count the incoming branches for subtrees that are not disjunct).
    239   CodeGen::Node insn6 =
    240       MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4);
    241 
    242   RunTest(insn6);
    243 }
    244 
    245 TEST_F(ProgramTest, ConfusingTails) {
    246   // This simple program demonstrates https://crbug.com/351103/
    247   // The two "LOAD 0" instructions are blocks of their own. MergeTails() could
    248   // be tempted to merge them since they are the same. However, they are
    249   // not mergeable because they fall-through to non semantically equivalent
    250   // blocks.
    251   // Without the fix for this bug, this program should trigger the check in
    252   // CompileAndCompare: the serialized graphs from the program and its compiled
    253   // version will differ.
    254   //
    255   //  0) LOAD 1  // ???
    256   //  1) if A == 0x1; then JMP 2 else JMP 3
    257   //  2) LOAD 0  // System call number
    258   //  3) if A == 0x2; then JMP 4 else JMP 5
    259   //  4) LOAD 0  // System call number
    260   //  5) if A == 0x1; then JMP 6 else JMP 7
    261   //  6) RET 0
    262   //  7) RET 1
    263 
    264   CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
    265   CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
    266   CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
    267   CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
    268   CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
    269   CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
    270   CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
    271   CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
    272 
    273   RunTest(i0);
    274 }
    275 
    276 TEST_F(ProgramTest, ConfusingTailsBasic) {
    277   // Without the fix for https://crbug.com/351103/, (see
    278   // SampleProgramConfusingTails()), this would generate a cyclic graph and
    279   // crash as the two "LOAD 0" instructions would get merged.
    280   //
    281   // 0) LOAD 1  // ???
    282   // 1) if A == 0x1; then JMP 2 else JMP 3
    283   // 2) LOAD 0  // System call number
    284   // 3) if A == 0x2; then JMP 4 else JMP 5
    285   // 4) LOAD 0  // System call number
    286   // 5) RET 1
    287 
    288   CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1);
    289   CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
    290   CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
    291   CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
    292   CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
    293   CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
    294 
    295   RunTest(i0);
    296 }
    297 
    298 TEST_F(ProgramTest, ConfusingTailsMergeable) {
    299   // This is similar to SampleProgramConfusingTails(), except that
    300   // instructions 2 and 4 are now RET instructions.
    301   // In PointerCompare(), this exercises the path where two blocks are of the
    302   // same length and identical and the last instruction is a JMP or RET, so the
    303   // following blocks don't need to be looked at and the blocks are mergeable.
    304   //
    305   // 0) LOAD 1  // ???
    306   // 1) if A == 0x1; then JMP 2 else JMP 3
    307   // 2) RET 42
    308   // 3) if A == 0x2; then JMP 4 else JMP 5
    309   // 4) RET 42
    310   // 5) if A == 0x1; then JMP 6 else JMP 7
    311   // 6) RET 0
    312   // 7) RET 1
    313 
    314   CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
    315   CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
    316   CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
    317   CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42);
    318   CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
    319   CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42);
    320   CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
    321   CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
    322 
    323   RunTest(i0);
    324 }
    325 
    326 TEST_F(ProgramTest, InstructionFolding) {
    327   // Check that simple instructions are folded as expected.
    328   CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0);
    329   EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
    330   CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1);
    331   EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
    332   EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
    333   EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
    334 
    335   // Check that complex sequences are folded too.
    336   CodeGen::Node c =
    337       MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0,
    338                       MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b));
    339   EXPECT_EQ(c, MakeInstruction(
    340                    BPF_LD + BPF_W + BPF_ABS, 0,
    341                    MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)));
    342 
    343   RunTest(c);
    344 }
    345 
    346 TEST_F(ProgramTest, FarBranches) {
    347   // BPF instructions use 8-bit fields for branch offsets, which means
    348   // branch targets must be within 255 instructions of the branch
    349   // instruction. CodeGen abstracts away this detail by inserting jump
    350   // instructions as needed, which we test here by generating programs
    351   // that should trigger any interesting boundary conditions.
    352 
    353   // Populate with 260 initial instruction nodes.
    354   std::vector<CodeGen::Node> nodes;
    355   nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
    356   for (size_t i = 1; i < 260; ++i) {
    357     nodes.push_back(
    358         MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
    359   }
    360 
    361   // Exhaustively test branch offsets near BPF's limits.
    362   for (size_t jt = 250; jt < 260; ++jt) {
    363     for (size_t jf = 250; jf < 260; ++jf) {
    364       nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0,
    365                                       nodes.rbegin()[jt], nodes.rbegin()[jf]));
    366       RunTest(nodes.back());
    367     }
    368   }
    369 }
    370 
    371 TEST_F(ProgramTest, JumpReuse) {
    372   // As a code size optimization, we try to reuse jumps when possible
    373   // instead of emitting new ones. Here we make sure that optimization
    374   // is working as intended.
    375   //
    376   // NOTE: To simplify testing, we rely on implementation details
    377   // about what CodeGen::Node values indicate (i.e., vector indices),
    378   // but CodeGen users should treat them as opaque values.
    379 
    380   // Populate with 260 initial instruction nodes.
    381   std::vector<CodeGen::Node> nodes;
    382   nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
    383   for (size_t i = 1; i < 260; ++i) {
    384     nodes.push_back(
    385         MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
    386   }
    387 
    388   // Branching to nodes[0] and nodes[1] should require 3 new
    389   // instructions: two far jumps plus the branch itself.
    390   CodeGen::Node one =
    391       MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]);
    392   EXPECT_EQ(nodes.back() + 3, one);  // XXX: Implementation detail!
    393   RunTest(one);
    394 
    395   // Branching again to the same target nodes should require only one
    396   // new instruction, as we can reuse the previous branch's jumps.
    397   CodeGen::Node two =
    398       MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]);
    399   EXPECT_EQ(one + 1, two);  // XXX: Implementation detail!
    400   RunTest(two);
    401 }
    402 
    403 }  // namespace
    404 }  // namespace sandbox
    405