1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <stdio.h> 6 7 #include "base/logging.h" 8 #include "sandbox/linux/seccomp-bpf/codegen.h" 9 10 namespace { 11 12 // Helper function for Traverse(). 13 void TraverseRecursively(std::set<sandbox::Instruction*>* visited, 14 sandbox::Instruction* instruction) { 15 if (visited->find(instruction) == visited->end()) { 16 visited->insert(instruction); 17 switch (BPF_CLASS(instruction->code)) { 18 case BPF_JMP: 19 if (BPF_OP(instruction->code) != BPF_JA) { 20 TraverseRecursively(visited, instruction->jf_ptr); 21 } 22 TraverseRecursively(visited, instruction->jt_ptr); 23 break; 24 case BPF_RET: 25 break; 26 default: 27 TraverseRecursively(visited, instruction->next); 28 break; 29 } 30 } 31 } 32 33 } // namespace 34 35 namespace sandbox { 36 37 CodeGen::CodeGen() : compiled_(false) {} 38 39 CodeGen::~CodeGen() { 40 for (Instructions::iterator iter = instructions_.begin(); 41 iter != instructions_.end(); 42 ++iter) { 43 delete *iter; 44 } 45 for (BasicBlocks::iterator iter = basic_blocks_.begin(); 46 iter != basic_blocks_.end(); 47 ++iter) { 48 delete *iter; 49 } 50 } 51 52 void CodeGen::PrintProgram(const SandboxBPF::Program& program) { 53 for (SandboxBPF::Program::const_iterator iter = program.begin(); 54 iter != program.end(); 55 ++iter) { 56 int ip = (int)(iter - program.begin()); 57 fprintf(stderr, "%3d) ", ip); 58 switch (BPF_CLASS(iter->code)) { 59 case BPF_LD: 60 if (iter->code == BPF_LD + BPF_W + BPF_ABS) { 61 fprintf(stderr, "LOAD %d // ", (int)iter->k); 62 if (iter->k == offsetof(struct arch_seccomp_data, nr)) { 63 fprintf(stderr, "System call number\n"); 64 } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) { 65 fprintf(stderr, "Architecture\n"); 66 } else if (iter->k == 67 offsetof(struct arch_seccomp_data, instruction_pointer)) { 68 fprintf(stderr, "Instruction pointer (LSB)\n"); 69 } else if (iter->k == 70 offsetof(struct arch_seccomp_data, instruction_pointer) + 71 4) { 72 fprintf(stderr, "Instruction pointer (MSB)\n"); 73 } else if (iter->k >= offsetof(struct arch_seccomp_data, args) && 74 iter->k < offsetof(struct arch_seccomp_data, args) + 48 && 75 (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 == 76 0) { 77 fprintf( 78 stderr, 79 "Argument %d (%cSB)\n", 80 (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8, 81 (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M' 82 : 'L'); 83 } else { 84 fprintf(stderr, "???\n"); 85 } 86 } else { 87 fprintf(stderr, "LOAD ???\n"); 88 } 89 break; 90 case BPF_JMP: 91 if (BPF_OP(iter->code) == BPF_JA) { 92 fprintf(stderr, "JMP %d\n", ip + iter->k + 1); 93 } else { 94 fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n", 95 BPF_OP(iter->code) == BPF_JSET ? "&" : 96 BPF_OP(iter->code) == BPF_JEQ ? "==" : 97 BPF_OP(iter->code) == BPF_JGE ? ">=" : 98 BPF_OP(iter->code) == BPF_JGT ? ">" : "???", 99 (int)iter->k, 100 ip + iter->jt + 1, ip + iter->jf + 1); 101 } 102 break; 103 case BPF_RET: 104 fprintf(stderr, "RET 0x%x // ", iter->k); 105 if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) { 106 fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA); 107 } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { 108 fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA); 109 } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE) { 110 fprintf(stderr, "Trace #%d\n", iter->k & SECCOMP_RET_DATA); 111 } else if (iter->k == SECCOMP_RET_ALLOW) { 112 fprintf(stderr, "Allowed\n"); 113 } else { 114 fprintf(stderr, "???\n"); 115 } 116 break; 117 case BPF_ALU: 118 fprintf(stderr, BPF_OP(iter->code) == BPF_NEG 119 ? "A := -A\n" : "A := A %s 0x%x\n", 120 BPF_OP(iter->code) == BPF_ADD ? "+" : 121 BPF_OP(iter->code) == BPF_SUB ? "-" : 122 BPF_OP(iter->code) == BPF_MUL ? "*" : 123 BPF_OP(iter->code) == BPF_DIV ? "/" : 124 BPF_OP(iter->code) == BPF_MOD ? "%" : 125 BPF_OP(iter->code) == BPF_OR ? "|" : 126 BPF_OP(iter->code) == BPF_XOR ? "^" : 127 BPF_OP(iter->code) == BPF_AND ? "&" : 128 BPF_OP(iter->code) == BPF_LSH ? "<<" : 129 BPF_OP(iter->code) == BPF_RSH ? ">>" : "???", 130 (int)iter->k); 131 break; 132 default: 133 fprintf(stderr, "???\n"); 134 break; 135 } 136 } 137 return; 138 } 139 140 Instruction* CodeGen::MakeInstruction(uint16_t code, 141 uint32_t k, 142 Instruction* next) { 143 // We can handle non-jumping instructions and "always" jumps. Both of 144 // them are followed by exactly one "next" instruction. 145 // We allow callers to defer specifying "next", but then they must call 146 // "joinInstructions" later. 147 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) { 148 SANDBOX_DIE( 149 "Must provide both \"true\" and \"false\" branch " 150 "for a BPF_JMP"); 151 } 152 if (next && BPF_CLASS(code) == BPF_RET) { 153 SANDBOX_DIE("Cannot append instructions after a return statement"); 154 } 155 if (BPF_CLASS(code) == BPF_JMP) { 156 // "Always" jumps use the "true" branch target, only. 157 Instruction* insn = new Instruction(code, 0, next, NULL); 158 instructions_.push_back(insn); 159 return insn; 160 } else { 161 // Non-jumping instructions do not use any of the branch targets. 162 Instruction* insn = new Instruction(code, k, next); 163 instructions_.push_back(insn); 164 return insn; 165 } 166 } 167 168 Instruction* CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) { 169 if (BPF_CLASS(code) != BPF_RET) { 170 SANDBOX_DIE("ErrorCodes can only be used in return expressions"); 171 } 172 if (err.error_type_ != ErrorCode::ET_SIMPLE && 173 err.error_type_ != ErrorCode::ET_TRAP) { 174 SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program"); 175 } 176 return MakeInstruction(code, err.err_); 177 } 178 179 Instruction* CodeGen::MakeInstruction(uint16_t code, 180 uint32_t k, 181 Instruction* jt, 182 Instruction* jf) { 183 // We can handle all conditional jumps. They are followed by both a 184 // "true" and a "false" branch. 185 if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) { 186 SANDBOX_DIE("Expected a BPF_JMP instruction"); 187 } 188 if (!jt && !jf) { 189 // We allow callers to defer specifying exactly one of the branch 190 // targets. It must then be set later by calling "JoinInstructions". 191 SANDBOX_DIE("Branches must jump to a valid instruction"); 192 } 193 Instruction* insn = new Instruction(code, k, jt, jf); 194 instructions_.push_back(insn); 195 return insn; 196 } 197 198 void CodeGen::JoinInstructions(Instruction* head, Instruction* tail) { 199 // Merge two instructions, or set the branch target for an "always" jump. 200 // This function should be called, if the caller didn't initially provide 201 // a value for "next" when creating the instruction. 202 if (BPF_CLASS(head->code) == BPF_JMP) { 203 if (BPF_OP(head->code) == BPF_JA) { 204 if (head->jt_ptr) { 205 SANDBOX_DIE("Cannot append instructions in the middle of a sequence"); 206 } 207 head->jt_ptr = tail; 208 } else { 209 if (!head->jt_ptr && head->jf_ptr) { 210 head->jt_ptr = tail; 211 } else if (!head->jf_ptr && head->jt_ptr) { 212 head->jf_ptr = tail; 213 } else { 214 SANDBOX_DIE("Cannot append instructions after a jump"); 215 } 216 } 217 } else if (BPF_CLASS(head->code) == BPF_RET) { 218 SANDBOX_DIE("Cannot append instructions after a return statement"); 219 } else if (head->next) { 220 SANDBOX_DIE("Cannot append instructions in the middle of a sequence"); 221 } else { 222 head->next = tail; 223 } 224 return; 225 } 226 227 void CodeGen::Traverse(Instruction* instruction, 228 void (*fnc)(Instruction*, void*), 229 void* aux) { 230 std::set<Instruction*> visited; 231 TraverseRecursively(&visited, instruction); 232 for (std::set<Instruction*>::const_iterator iter = visited.begin(); 233 iter != visited.end(); 234 ++iter) { 235 fnc(*iter, aux); 236 } 237 } 238 239 void CodeGen::FindBranchTargets(const Instruction& instructions, 240 BranchTargets* branch_targets) { 241 // Follow all possible paths through the "instructions" graph and compute 242 // a list of branch targets. This will later be needed to compute the 243 // boundaries of basic blocks. 244 // We maintain a set of all instructions that we have previously seen. This 245 // set ultimately converges on all instructions in the program. 246 std::set<const Instruction*> seen_instructions; 247 Instructions stack; 248 for (const Instruction* insn = &instructions; insn;) { 249 seen_instructions.insert(insn); 250 if (BPF_CLASS(insn->code) == BPF_JMP) { 251 // Found a jump. Increase count of incoming edges for each of the jump 252 // targets. 253 ++(*branch_targets)[insn->jt_ptr]; 254 if (BPF_OP(insn->code) != BPF_JA) { 255 ++(*branch_targets)[insn->jf_ptr]; 256 stack.push_back(const_cast<Instruction*>(insn)); 257 } 258 // Start a recursive decent for depth-first traversal. 259 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { 260 // We haven't seen the "true" branch yet. Traverse it now. We have 261 // already remembered the "false" branch on the stack and will 262 // traverse it later. 263 insn = insn->jt_ptr; 264 continue; 265 } else { 266 // Now try traversing the "false" branch. 267 insn = NULL; 268 } 269 } else { 270 // This is a non-jump instruction, just continue to the next instruction 271 // (if any). It's OK if "insn" becomes NULL when reaching a return 272 // instruction. 273 if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) { 274 SANDBOX_DIE( 275 "Internal compiler error; return instruction must be at " 276 "the end of the BPF program"); 277 } 278 if (seen_instructions.find(insn->next) == seen_instructions.end()) { 279 insn = insn->next; 280 } else { 281 // We have seen this instruction before. That could happen if it is 282 // a branch target. No need to continue processing. 283 insn = NULL; 284 } 285 } 286 while (!insn && !stack.empty()) { 287 // We are done processing all the way to a leaf node, backtrack up the 288 // stack to any branches that we haven't processed yet. By definition, 289 // this has to be a "false" branch, as we always process the "true" 290 // branches right away. 291 insn = stack.back(); 292 stack.pop_back(); 293 if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) { 294 // We haven't seen the "false" branch yet. So, that's where we'll 295 // go now. 296 insn = insn->jf_ptr; 297 } else { 298 // We have seen both the "true" and the "false" branch, continue 299 // up the stack. 300 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { 301 SANDBOX_DIE( 302 "Internal compiler error; cannot find all " 303 "branch targets"); 304 } 305 insn = NULL; 306 } 307 } 308 } 309 return; 310 } 311 312 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) { 313 // Iterate over all the instructions between "head" and "tail" and 314 // insert them into a new basic block. 315 BasicBlock* bb = new BasicBlock; 316 for (;; head = head->next) { 317 bb->instructions.push_back(head); 318 if (head == tail) { 319 break; 320 } 321 if (BPF_CLASS(head->code) == BPF_JMP) { 322 SANDBOX_DIE("Found a jump inside of a basic block"); 323 } 324 } 325 basic_blocks_.push_back(bb); 326 return bb; 327 } 328 329 void CodeGen::AddBasicBlock(Instruction* head, 330 Instruction* tail, 331 const BranchTargets& branch_targets, 332 TargetsToBlocks* basic_blocks, 333 BasicBlock** firstBlock) { 334 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it 335 // has not been set before. 336 BranchTargets::const_iterator iter = branch_targets.find(head); 337 if ((iter == branch_targets.end()) != !*firstBlock || 338 !*firstBlock != basic_blocks->empty()) { 339 SANDBOX_DIE( 340 "Only the very first basic block should have no " 341 "incoming jumps"); 342 } 343 BasicBlock* bb = MakeBasicBlock(head, tail); 344 if (!*firstBlock) { 345 *firstBlock = bb; 346 } 347 (*basic_blocks)[head] = bb; 348 return; 349 } 350 351 BasicBlock* CodeGen::CutGraphIntoBasicBlocks( 352 Instruction* instructions, 353 const BranchTargets& branch_targets, 354 TargetsToBlocks* basic_blocks) { 355 // Textbook implementation of a basic block generator. All basic blocks 356 // start with a branch target and end with either a return statement or 357 // a jump (or are followed by an instruction that forms the beginning of a 358 // new block). Both conditional and "always" jumps are supported. 359 BasicBlock* first_block = NULL; 360 std::set<const Instruction*> seen_instructions; 361 Instructions stack; 362 Instruction* tail = NULL; 363 Instruction* head = instructions; 364 for (Instruction* insn = head; insn;) { 365 if (seen_instructions.find(insn) != seen_instructions.end()) { 366 // We somehow went in a circle. This should never be possible. Not even 367 // cyclic graphs are supposed to confuse us this much. 368 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks"); 369 } 370 seen_instructions.insert(insn); 371 if (tail && branch_targets.find(insn) != branch_targets.end()) { 372 // We reached a branch target. Start a new basic block (this means, 373 // flushing the previous basic block first). 374 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); 375 head = insn; 376 } 377 if (BPF_CLASS(insn->code) == BPF_JMP) { 378 // We reached a jump instruction, this completes our current basic 379 // block. Flush it and continue by traversing both the true and the 380 // false branch of the jump. We need to maintain a stack to do so. 381 AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block); 382 if (BPF_OP(insn->code) != BPF_JA) { 383 stack.push_back(insn->jf_ptr); 384 } 385 insn = insn->jt_ptr; 386 387 // If we are jumping to an instruction that we have previously 388 // processed, we are done with this branch. Continue by backtracking 389 // up the stack. 390 while (seen_instructions.find(insn) != seen_instructions.end()) { 391 backtracking: 392 if (stack.empty()) { 393 // We successfully traversed all reachable instructions. 394 return first_block; 395 } else { 396 // Going up the stack. 397 insn = stack.back(); 398 stack.pop_back(); 399 } 400 } 401 // Starting a new basic block. 402 tail = NULL; 403 head = insn; 404 } else { 405 // We found a non-jumping instruction, append it to current basic 406 // block. 407 tail = insn; 408 insn = insn->next; 409 if (!insn) { 410 // We reached a return statement, flush the current basic block and 411 // backtrack up the stack. 412 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); 413 goto backtracking; 414 } 415 } 416 } 417 return first_block; 418 } 419 420 // We define a comparator that inspects the sequence of instructions in our 421 // basic block and any blocks referenced by this block. This function can be 422 // used in a "less" comparator for the purpose of storing pointers to basic 423 // blocks in STL containers; this gives an easy option to use STL to find 424 // shared tail sequences of basic blocks. 425 static int PointerCompare(const BasicBlock* block1, 426 const BasicBlock* block2, 427 const TargetsToBlocks& blocks) { 428 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2". 429 // If we are looking at the exact same block, this is trivial and we don't 430 // need to do a full comparison. 431 if (block1 == block2) { 432 return 0; 433 } 434 435 // We compare the sequence of instructions in both basic blocks. 436 const Instructions& insns1 = block1->instructions; 437 const Instructions& insns2 = block2->instructions; 438 // Basic blocks should never be empty. 439 CHECK(!insns1.empty()); 440 CHECK(!insns2.empty()); 441 442 Instructions::const_iterator iter1 = insns1.begin(); 443 Instructions::const_iterator iter2 = insns2.begin(); 444 for (;; ++iter1, ++iter2) { 445 // If we have reached the end of the sequence of instructions in one or 446 // both basic blocks, we know the relative ordering between the two blocks 447 // and can return. 448 if (iter1 == insns1.end() || iter2 == insns2.end()) { 449 if (iter1 != insns1.end()) { 450 return 1; 451 } 452 if (iter2 != insns2.end()) { 453 return -1; 454 } 455 456 // If the two blocks are the same length (and have elementwise-equal code 457 // and k fields) and their last instructions are neither a JMP nor a RET 458 // (which is the only way we can reach this point), then we must compare 459 // their successors. 460 Instruction* const insns1_last = insns1.back(); 461 Instruction* const insns2_last = insns2.back(); 462 CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP && 463 BPF_CLASS(insns1_last->code) != BPF_RET); 464 465 // Non jumping instructions will always have a valid next instruction. 466 CHECK(insns1_last->next); 467 CHECK(insns2_last->next); 468 return PointerCompare(blocks.find(insns1_last->next)->second, 469 blocks.find(insns2_last->next)->second, 470 blocks); 471 } 472 473 // Compare the individual fields for both instructions. 474 const Instruction& insn1 = **iter1; 475 const Instruction& insn2 = **iter2; 476 if (insn1.code != insn2.code) { 477 return insn1.code - insn2.code; 478 } 479 if (insn1.k != insn2.k) { 480 return insn1.k - insn2.k; 481 } 482 483 // Sanity check: If we're looking at a JMP or RET instruction, by definition 484 // it should be the last instruction of the basic block. 485 if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) { 486 CHECK_EQ(insns1.back(), &insn1); 487 CHECK_EQ(insns2.back(), &insn2); 488 } 489 490 // RET instructions terminate execution, and only JMP instructions use the 491 // jt_ptr and jf_ptr fields. Anything else can continue to the next 492 // instruction in the basic block. 493 if (BPF_CLASS(insn1.code) == BPF_RET) { 494 return 0; 495 } else if (BPF_CLASS(insn1.code) != BPF_JMP) { 496 continue; 497 } 498 499 // Recursively compare the "true" and "false" branches. 500 // A well-formed BPF program can't have any cycles, so we know 501 // that our recursive algorithm will ultimately terminate. 502 // In the unlikely event that the programmer made a mistake and 503 // went out of the way to give us a cyclic program, we will crash 504 // with a stack overflow. We are OK with that. 505 if (BPF_OP(insn1.code) != BPF_JA) { 506 int c = PointerCompare(blocks.find(insn1.jf_ptr)->second, 507 blocks.find(insn2.jf_ptr)->second, 508 blocks); 509 if (c != 0) { 510 return c; 511 } 512 } 513 return PointerCompare(blocks.find(insn1.jt_ptr)->second, 514 blocks.find(insn2.jt_ptr)->second, 515 blocks); 516 } 517 } 518 519 void CodeGen::MergeTails(TargetsToBlocks* blocks) { 520 // We enter all of our basic blocks into a set using the BasicBlock::Less() 521 // comparator. This naturally results in blocks with identical tails of 522 // instructions to map to the same entry in the set. Whenever we discover 523 // that a particular chain of instructions is already in the set, we merge 524 // the basic blocks and update the pointer in the "blocks" map. 525 // Returns the number of unique basic blocks. 526 // N.B. We don't merge instructions on a granularity that is finer than 527 // a basic block. In practice, this is sufficiently rare that we don't 528 // incur a big cost. 529 // Similarly, we currently don't merge anything other than tails. In 530 // the future, we might decide to revisit this decision and attempt to 531 // merge arbitrary sub-sequences of instructions. 532 BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare); 533 typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set; 534 Set seen_basic_blocks(less); 535 for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end(); 536 ++iter) { 537 BasicBlock* bb = iter->second; 538 Set::const_iterator entry = seen_basic_blocks.find(bb); 539 if (entry == seen_basic_blocks.end()) { 540 // This is the first time we see this particular sequence of 541 // instructions. Enter the basic block into the set of known 542 // basic blocks. 543 seen_basic_blocks.insert(bb); 544 } else { 545 // We have previously seen another basic block that defines the same 546 // sequence of instructions. Merge the two blocks and update the 547 // pointer in the "blocks" map. 548 iter->second = *entry; 549 } 550 } 551 } 552 553 void CodeGen::ComputeIncomingBranches(BasicBlock* block, 554 const TargetsToBlocks& targets_to_blocks, 555 IncomingBranches* incoming_branches) { 556 // We increment the number of incoming branches each time we encounter a 557 // basic block. But we only traverse recursively the very first time we 558 // encounter a new block. This is necessary to make topological sorting 559 // work correctly. 560 if (++(*incoming_branches)[block] == 1) { 561 Instruction* last_insn = block->instructions.back(); 562 if (BPF_CLASS(last_insn->code) == BPF_JMP) { 563 ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second, 564 targets_to_blocks, 565 incoming_branches); 566 if (BPF_OP(last_insn->code) != BPF_JA) { 567 ComputeIncomingBranches( 568 targets_to_blocks.find(last_insn->jf_ptr)->second, 569 targets_to_blocks, 570 incoming_branches); 571 } 572 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { 573 ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second, 574 targets_to_blocks, 575 incoming_branches); 576 } 577 } 578 } 579 580 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block, 581 const TargetsToBlocks& blocks, 582 BasicBlocks* basic_blocks) { 583 // Textbook implementation of a toposort. We keep looking for basic blocks 584 // that don't have any incoming branches (initially, this is just the 585 // "first_block") and add them to the topologically sorted list of 586 // "basic_blocks". As we do so, we remove outgoing branches. This potentially 587 // ends up making our descendants eligible for the sorted list. The 588 // sorting algorithm terminates when there are no more basic blocks that have 589 // no incoming branches. If we didn't move all blocks from the set of 590 // "unordered_blocks" to the sorted list of "basic_blocks", there must have 591 // been a cyclic dependency. This should never happen in a BPF program, as 592 // well-formed BPF programs only ever have forward branches. 593 IncomingBranches unordered_blocks; 594 ComputeIncomingBranches(first_block, blocks, &unordered_blocks); 595 596 std::set<BasicBlock*> heads; 597 for (;;) { 598 // Move block from "unordered_blocks" to "basic_blocks". 599 basic_blocks->push_back(first_block); 600 601 // Inspect last instruction in the basic block. This is typically either a 602 // jump or a return statement. But it could also be a "normal" instruction 603 // that is followed by a jump target. 604 Instruction* last_insn = first_block->instructions.back(); 605 if (BPF_CLASS(last_insn->code) == BPF_JMP) { 606 // Remove outgoing branches. This might end up moving our descendants 607 // into set of "head" nodes that no longer have any incoming branches. 608 TargetsToBlocks::const_iterator iter; 609 if (BPF_OP(last_insn->code) != BPF_JA) { 610 iter = blocks.find(last_insn->jf_ptr); 611 if (!--unordered_blocks[iter->second]) { 612 heads.insert(iter->second); 613 } 614 } 615 iter = blocks.find(last_insn->jt_ptr); 616 if (!--unordered_blocks[iter->second]) { 617 first_block = iter->second; 618 continue; 619 } 620 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { 621 // We encountered an instruction that doesn't change code flow. Try to 622 // pick the next "first_block" from "last_insn->next", if possible. 623 TargetsToBlocks::const_iterator iter; 624 iter = blocks.find(last_insn->next); 625 if (!--unordered_blocks[iter->second]) { 626 first_block = iter->second; 627 continue; 628 } else { 629 // Our basic block is supposed to be followed by "last_insn->next", 630 // but dependencies prevent this from happening. Insert a BPF_JA 631 // instruction to correct the code flow. 632 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next); 633 first_block->instructions.push_back(ja); 634 last_insn->next = ja; 635 } 636 } 637 if (heads.empty()) { 638 if (unordered_blocks.size() != basic_blocks->size()) { 639 SANDBOX_DIE("Internal compiler error; cyclic graph detected"); 640 } 641 return; 642 } 643 // Proceed by picking an arbitrary node from the set of basic blocks that 644 // do not have any incoming branches. 645 first_block = *heads.begin(); 646 heads.erase(heads.begin()); 647 } 648 } 649 650 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks, 651 const TargetsToBlocks& targets_to_blocks) { 652 // While we previously used pointers in jt_ptr and jf_ptr to link jump 653 // instructions to their targets, we now convert these jumps to relative 654 // jumps that are suitable for loading the BPF program into the kernel. 655 int offset = 0; 656 657 // Since we just completed a toposort, all jump targets are guaranteed to 658 // go forward. This means, iterating over the basic blocks in reverse makes 659 // it trivial to compute the correct offsets. 660 BasicBlock* bb = NULL; 661 BasicBlock* last_bb = NULL; 662 for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin(); 663 iter != basic_blocks->rend(); 664 ++iter) { 665 last_bb = bb; 666 bb = *iter; 667 Instruction* insn = bb->instructions.back(); 668 if (BPF_CLASS(insn->code) == BPF_JMP) { 669 // Basic block ended in a jump instruction. We can now compute the 670 // appropriate offsets. 671 if (BPF_OP(insn->code) == BPF_JA) { 672 // "Always" jumps use the 32bit "k" field for the offset, instead 673 // of the 8bit "jt" and "jf" fields. 674 int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; 675 insn->k = jmp; 676 insn->jt = insn->jf = 0; 677 } else { 678 // The offset computations for conditional jumps are just the same 679 // as for "always" jumps. 680 int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; 681 int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset; 682 683 // There is an added complication, because conditional relative jumps 684 // can only jump at most 255 instructions forward. If we have to jump 685 // further, insert an extra "always" jump. 686 Instructions::size_type jmp = bb->instructions.size(); 687 if (jt > 255 || (jt == 255 && jf > 255)) { 688 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr); 689 bb->instructions.push_back(ja); 690 ja->k = jt; 691 ja->jt = ja->jf = 0; 692 693 // The newly inserted "always" jump, of course, requires us to adjust 694 // the jump targets in the original conditional jump. 695 jt = 0; 696 ++jf; 697 } 698 if (jf > 255) { 699 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr); 700 bb->instructions.insert(bb->instructions.begin() + jmp, ja); 701 ja->k = jf; 702 ja->jt = ja->jf = 0; 703 704 // Again, we have to adjust the jump targets in the original 705 // conditional jump. 706 ++jt; 707 jf = 0; 708 } 709 710 // Now we can finally set the relative jump targets in the conditional 711 // jump instruction. Afterwards, we must no longer access the jt_ptr 712 // and jf_ptr fields. 713 insn->jt = jt; 714 insn->jf = jf; 715 } 716 } else if (BPF_CLASS(insn->code) != BPF_RET && 717 targets_to_blocks.find(insn->next)->second != last_bb) { 718 SANDBOX_DIE("Internal compiler error; invalid basic block encountered"); 719 } 720 721 // Proceed to next basic block. 722 offset += bb->instructions.size(); 723 bb->offset = offset; 724 } 725 return; 726 } 727 728 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks, 729 SandboxBPF::Program* program) { 730 // Our basic blocks have been sorted and relative jump offsets have been 731 // computed. The last remaining step is for all the instructions in our 732 // basic blocks to be concatenated into a BPF program. 733 program->clear(); 734 for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin(); 735 bb_iter != basic_blocks.end(); 736 ++bb_iter) { 737 const BasicBlock& bb = **bb_iter; 738 for (Instructions::const_iterator insn_iter = bb.instructions.begin(); 739 insn_iter != bb.instructions.end(); 740 ++insn_iter) { 741 const Instruction& insn = **insn_iter; 742 program->push_back( 743 (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k}); 744 } 745 } 746 return; 747 } 748 749 void CodeGen::Compile(Instruction* instructions, SandboxBPF::Program* program) { 750 if (compiled_) { 751 SANDBOX_DIE( 752 "Cannot call Compile() multiple times. Create a new code " 753 "generator instead"); 754 } 755 compiled_ = true; 756 757 BranchTargets branch_targets; 758 FindBranchTargets(*instructions, &branch_targets); 759 TargetsToBlocks all_blocks; 760 BasicBlock* first_block = 761 CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks); 762 MergeTails(&all_blocks); 763 BasicBlocks basic_blocks; 764 TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks); 765 ComputeRelativeJumps(&basic_blocks, all_blocks); 766 ConcatenateBasicBlocks(basic_blocks, program); 767 return; 768 } 769 770 } // namespace sandbox 771