1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <stdio.h> 6 7 #include "sandbox/linux/seccomp-bpf/codegen.h" 8 9 namespace { 10 11 // Helper function for Traverse(). 12 void TraverseRecursively(std::set<sandbox::Instruction*>* visited, 13 sandbox::Instruction* instruction) { 14 if (visited->find(instruction) == visited->end()) { 15 visited->insert(instruction); 16 switch (BPF_CLASS(instruction->code)) { 17 case BPF_JMP: 18 if (BPF_OP(instruction->code) != BPF_JA) { 19 TraverseRecursively(visited, instruction->jf_ptr); 20 } 21 TraverseRecursively(visited, instruction->jt_ptr); 22 break; 23 case BPF_RET: 24 break; 25 default: 26 TraverseRecursively(visited, instruction->next); 27 break; 28 } 29 } 30 } 31 32 } // namespace 33 34 namespace sandbox { 35 36 CodeGen::CodeGen() : compiled_(false) {} 37 38 CodeGen::~CodeGen() { 39 for (Instructions::iterator iter = instructions_.begin(); 40 iter != instructions_.end(); 41 ++iter) { 42 delete *iter; 43 } 44 for (BasicBlocks::iterator iter = basic_blocks_.begin(); 45 iter != basic_blocks_.end(); 46 ++iter) { 47 delete *iter; 48 } 49 } 50 51 void CodeGen::PrintProgram(const SandboxBPF::Program& program) { 52 for (SandboxBPF::Program::const_iterator iter = program.begin(); 53 iter != program.end(); 54 ++iter) { 55 int ip = (int)(iter - program.begin()); 56 fprintf(stderr, "%3d) ", ip); 57 switch (BPF_CLASS(iter->code)) { 58 case BPF_LD: 59 if (iter->code == BPF_LD + BPF_W + BPF_ABS) { 60 fprintf(stderr, "LOAD %d // ", (int)iter->k); 61 if (iter->k == offsetof(struct arch_seccomp_data, nr)) { 62 fprintf(stderr, "System call number\n"); 63 } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) { 64 fprintf(stderr, "Architecture\n"); 65 } else if (iter->k == 66 offsetof(struct arch_seccomp_data, instruction_pointer)) { 67 fprintf(stderr, "Instruction pointer (LSB)\n"); 68 } else if (iter->k == 69 offsetof(struct arch_seccomp_data, instruction_pointer) + 70 4) { 71 fprintf(stderr, "Instruction pointer (MSB)\n"); 72 } else if (iter->k >= offsetof(struct arch_seccomp_data, args) && 73 iter->k < offsetof(struct arch_seccomp_data, args) + 48 && 74 (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 == 75 0) { 76 fprintf( 77 stderr, 78 "Argument %d (%cSB)\n", 79 (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8, 80 (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M' 81 : 'L'); 82 } else { 83 fprintf(stderr, "???\n"); 84 } 85 } else { 86 fprintf(stderr, "LOAD ???\n"); 87 } 88 break; 89 case BPF_JMP: 90 if (BPF_OP(iter->code) == BPF_JA) { 91 fprintf(stderr, "JMP %d\n", ip + iter->k + 1); 92 } else { 93 fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n", 94 BPF_OP(iter->code) == BPF_JSET ? "&" : 95 BPF_OP(iter->code) == BPF_JEQ ? "==" : 96 BPF_OP(iter->code) == BPF_JGE ? ">=" : 97 BPF_OP(iter->code) == BPF_JGT ? ">" : "???", 98 (int)iter->k, 99 ip + iter->jt + 1, ip + iter->jf + 1); 100 } 101 break; 102 case BPF_RET: 103 fprintf(stderr, "RET 0x%x // ", iter->k); 104 if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) { 105 fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA); 106 } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { 107 fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA); 108 } else if (iter->k == SECCOMP_RET_ALLOW) { 109 fprintf(stderr, "Allowed\n"); 110 } else { 111 fprintf(stderr, "???\n"); 112 } 113 break; 114 case BPF_ALU: 115 fprintf(stderr, BPF_OP(iter->code) == BPF_NEG 116 ? "A := -A\n" : "A := A %s 0x%x\n", 117 BPF_OP(iter->code) == BPF_ADD ? "+" : 118 BPF_OP(iter->code) == BPF_SUB ? "-" : 119 BPF_OP(iter->code) == BPF_MUL ? "*" : 120 BPF_OP(iter->code) == BPF_DIV ? "/" : 121 BPF_OP(iter->code) == BPF_MOD ? "%" : 122 BPF_OP(iter->code) == BPF_OR ? "|" : 123 BPF_OP(iter->code) == BPF_XOR ? "^" : 124 BPF_OP(iter->code) == BPF_AND ? "&" : 125 BPF_OP(iter->code) == BPF_LSH ? "<<" : 126 BPF_OP(iter->code) == BPF_RSH ? ">>" : "???", 127 (int)iter->k); 128 break; 129 default: 130 fprintf(stderr, "???\n"); 131 break; 132 } 133 } 134 return; 135 } 136 137 Instruction* CodeGen::MakeInstruction(uint16_t code, 138 uint32_t k, 139 Instruction* next) { 140 // We can handle non-jumping instructions and "always" jumps. Both of 141 // them are followed by exactly one "next" instruction. 142 // We allow callers to defer specifying "next", but then they must call 143 // "joinInstructions" later. 144 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) { 145 SANDBOX_DIE( 146 "Must provide both \"true\" and \"false\" branch " 147 "for a BPF_JMP"); 148 } 149 if (next && BPF_CLASS(code) == BPF_RET) { 150 SANDBOX_DIE("Cannot append instructions after a return statement"); 151 } 152 if (BPF_CLASS(code) == BPF_JMP) { 153 // "Always" jumps use the "true" branch target, only. 154 Instruction* insn = new Instruction(code, 0, next, NULL); 155 instructions_.push_back(insn); 156 return insn; 157 } else { 158 // Non-jumping instructions do not use any of the branch targets. 159 Instruction* insn = new Instruction(code, k, next); 160 instructions_.push_back(insn); 161 return insn; 162 } 163 } 164 165 Instruction* CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) { 166 if (BPF_CLASS(code) != BPF_RET) { 167 SANDBOX_DIE("ErrorCodes can only be used in return expressions"); 168 } 169 if (err.error_type_ != ErrorCode::ET_SIMPLE && 170 err.error_type_ != ErrorCode::ET_TRAP) { 171 SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program"); 172 } 173 return MakeInstruction(code, err.err_); 174 } 175 176 Instruction* CodeGen::MakeInstruction(uint16_t code, 177 uint32_t k, 178 Instruction* jt, 179 Instruction* jf) { 180 // We can handle all conditional jumps. They are followed by both a 181 // "true" and a "false" branch. 182 if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) { 183 SANDBOX_DIE("Expected a BPF_JMP instruction"); 184 } 185 if (!jt && !jf) { 186 // We allow callers to defer specifying exactly one of the branch 187 // targets. It must then be set later by calling "JoinInstructions". 188 SANDBOX_DIE("Branches must jump to a valid instruction"); 189 } 190 Instruction* insn = new Instruction(code, k, jt, jf); 191 instructions_.push_back(insn); 192 return insn; 193 } 194 195 void CodeGen::JoinInstructions(Instruction* head, Instruction* tail) { 196 // Merge two instructions, or set the branch target for an "always" jump. 197 // This function should be called, if the caller didn't initially provide 198 // a value for "next" when creating the instruction. 199 if (BPF_CLASS(head->code) == BPF_JMP) { 200 if (BPF_OP(head->code) == BPF_JA) { 201 if (head->jt_ptr) { 202 SANDBOX_DIE("Cannot append instructions in the middle of a sequence"); 203 } 204 head->jt_ptr = tail; 205 } else { 206 if (!head->jt_ptr && head->jf_ptr) { 207 head->jt_ptr = tail; 208 } else if (!head->jf_ptr && head->jt_ptr) { 209 head->jf_ptr = tail; 210 } else { 211 SANDBOX_DIE("Cannot append instructions after a jump"); 212 } 213 } 214 } else if (BPF_CLASS(head->code) == BPF_RET) { 215 SANDBOX_DIE("Cannot append instructions after a return statement"); 216 } else if (head->next) { 217 SANDBOX_DIE("Cannot append instructions in the middle of a sequence"); 218 } else { 219 head->next = tail; 220 } 221 return; 222 } 223 224 void CodeGen::Traverse(Instruction* instruction, 225 void (*fnc)(Instruction*, void*), 226 void* aux) { 227 std::set<Instruction*> visited; 228 TraverseRecursively(&visited, instruction); 229 for (std::set<Instruction*>::const_iterator iter = visited.begin(); 230 iter != visited.end(); 231 ++iter) { 232 fnc(*iter, aux); 233 } 234 } 235 236 void CodeGen::FindBranchTargets(const Instruction& instructions, 237 BranchTargets* branch_targets) { 238 // Follow all possible paths through the "instructions" graph and compute 239 // a list of branch targets. This will later be needed to compute the 240 // boundaries of basic blocks. 241 // We maintain a set of all instructions that we have previously seen. This 242 // set ultimately converges on all instructions in the program. 243 std::set<const Instruction*> seen_instructions; 244 Instructions stack; 245 for (const Instruction* insn = &instructions; insn;) { 246 seen_instructions.insert(insn); 247 if (BPF_CLASS(insn->code) == BPF_JMP) { 248 // Found a jump. Increase count of incoming edges for each of the jump 249 // targets. 250 ++(*branch_targets)[insn->jt_ptr]; 251 if (BPF_OP(insn->code) != BPF_JA) { 252 ++(*branch_targets)[insn->jf_ptr]; 253 stack.push_back(const_cast<Instruction*>(insn)); 254 } 255 // Start a recursive decent for depth-first traversal. 256 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { 257 // We haven't seen the "true" branch yet. Traverse it now. We have 258 // already remembered the "false" branch on the stack and will 259 // traverse it later. 260 insn = insn->jt_ptr; 261 continue; 262 } else { 263 // Now try traversing the "false" branch. 264 insn = NULL; 265 } 266 } else { 267 // This is a non-jump instruction, just continue to the next instruction 268 // (if any). It's OK if "insn" becomes NULL when reaching a return 269 // instruction. 270 if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) { 271 SANDBOX_DIE( 272 "Internal compiler error; return instruction must be at " 273 "the end of the BPF program"); 274 } 275 if (seen_instructions.find(insn->next) == seen_instructions.end()) { 276 insn = insn->next; 277 } else { 278 // We have seen this instruction before. That could happen if it is 279 // a branch target. No need to continue processing. 280 insn = NULL; 281 } 282 } 283 while (!insn && !stack.empty()) { 284 // We are done processing all the way to a leaf node, backtrack up the 285 // stack to any branches that we haven't processed yet. By definition, 286 // this has to be a "false" branch, as we always process the "true" 287 // branches right away. 288 insn = stack.back(); 289 stack.pop_back(); 290 if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) { 291 // We haven't seen the "false" branch yet. So, that's where we'll 292 // go now. 293 insn = insn->jf_ptr; 294 } else { 295 // We have seen both the "true" and the "false" branch, continue 296 // up the stack. 297 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { 298 SANDBOX_DIE( 299 "Internal compiler error; cannot find all " 300 "branch targets"); 301 } 302 insn = NULL; 303 } 304 } 305 } 306 return; 307 } 308 309 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) { 310 // Iterate over all the instructions between "head" and "tail" and 311 // insert them into a new basic block. 312 BasicBlock* bb = new BasicBlock; 313 for (;; head = head->next) { 314 bb->instructions.push_back(head); 315 if (head == tail) { 316 break; 317 } 318 if (BPF_CLASS(head->code) == BPF_JMP) { 319 SANDBOX_DIE("Found a jump inside of a basic block"); 320 } 321 } 322 basic_blocks_.push_back(bb); 323 return bb; 324 } 325 326 void CodeGen::AddBasicBlock(Instruction* head, 327 Instruction* tail, 328 const BranchTargets& branch_targets, 329 TargetsToBlocks* basic_blocks, 330 BasicBlock** firstBlock) { 331 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it 332 // has not been set before. 333 BranchTargets::const_iterator iter = branch_targets.find(head); 334 if ((iter == branch_targets.end()) != !*firstBlock || 335 !*firstBlock != basic_blocks->empty()) { 336 SANDBOX_DIE( 337 "Only the very first basic block should have no " 338 "incoming jumps"); 339 } 340 BasicBlock* bb = MakeBasicBlock(head, tail); 341 if (!*firstBlock) { 342 *firstBlock = bb; 343 } 344 (*basic_blocks)[head] = bb; 345 return; 346 } 347 348 BasicBlock* CodeGen::CutGraphIntoBasicBlocks( 349 Instruction* instructions, 350 const BranchTargets& branch_targets, 351 TargetsToBlocks* basic_blocks) { 352 // Textbook implementation of a basic block generator. All basic blocks 353 // start with a branch target and end with either a return statement or 354 // a jump (or are followed by an instruction that forms the beginning of a 355 // new block). Both conditional and "always" jumps are supported. 356 BasicBlock* first_block = NULL; 357 std::set<const Instruction*> seen_instructions; 358 Instructions stack; 359 Instruction* tail = NULL; 360 Instruction* head = instructions; 361 for (Instruction* insn = head; insn;) { 362 if (seen_instructions.find(insn) != seen_instructions.end()) { 363 // We somehow went in a circle. This should never be possible. Not even 364 // cyclic graphs are supposed to confuse us this much. 365 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks"); 366 } 367 seen_instructions.insert(insn); 368 if (tail && branch_targets.find(insn) != branch_targets.end()) { 369 // We reached a branch target. Start a new basic block (this means, 370 // flushing the previous basic block first). 371 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); 372 head = insn; 373 } 374 if (BPF_CLASS(insn->code) == BPF_JMP) { 375 // We reached a jump instruction, this completes our current basic 376 // block. Flush it and continue by traversing both the true and the 377 // false branch of the jump. We need to maintain a stack to do so. 378 AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block); 379 if (BPF_OP(insn->code) != BPF_JA) { 380 stack.push_back(insn->jf_ptr); 381 } 382 insn = insn->jt_ptr; 383 384 // If we are jumping to an instruction that we have previously 385 // processed, we are done with this branch. Continue by backtracking 386 // up the stack. 387 while (seen_instructions.find(insn) != seen_instructions.end()) { 388 backtracking: 389 if (stack.empty()) { 390 // We successfully traversed all reachable instructions. 391 return first_block; 392 } else { 393 // Going up the stack. 394 insn = stack.back(); 395 stack.pop_back(); 396 } 397 } 398 // Starting a new basic block. 399 tail = NULL; 400 head = insn; 401 } else { 402 // We found a non-jumping instruction, append it to current basic 403 // block. 404 tail = insn; 405 insn = insn->next; 406 if (!insn) { 407 // We reached a return statement, flush the current basic block and 408 // backtrack up the stack. 409 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); 410 goto backtracking; 411 } 412 } 413 } 414 return first_block; 415 } 416 417 // We define a comparator that inspects the sequence of instructions in our 418 // basic block and any blocks referenced by this block. This function can be 419 // used in a "less" comparator for the purpose of storing pointers to basic 420 // blocks in STL containers; this gives an easy option to use STL to find 421 // shared tail sequences of basic blocks. 422 static int PointerCompare(const BasicBlock* block1, 423 const BasicBlock* block2, 424 const TargetsToBlocks& blocks) { 425 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2". 426 // If we are looking at the exact same block, this is trivial and we don't 427 // need to do a full comparison. 428 if (block1 == block2) { 429 return 0; 430 } 431 432 // We compare the sequence of instructions in both basic blocks. 433 const Instructions& insns1 = block1->instructions; 434 const Instructions& insns2 = block2->instructions; 435 Instructions::const_iterator iter1 = insns1.begin(); 436 Instructions::const_iterator iter2 = insns2.begin(); 437 for (;; ++iter1, ++iter2) { 438 // If we have reached the end of the sequence of instructions in one or 439 // both basic blocks, we know the relative ordering between the two blocks 440 // and can return. 441 if (iter1 == insns1.end()) { 442 return iter2 == insns2.end() ? 0 : -1; 443 } else if (iter2 == insns2.end()) { 444 return 1; 445 } 446 447 // Compare the individual fields for both instructions. 448 const Instruction& insn1 = **iter1; 449 const Instruction& insn2 = **iter2; 450 if (insn1.code == insn2.code) { 451 if (insn1.k == insn2.k) { 452 // Only conditional jump instructions use the jt_ptr and jf_ptr 453 // fields. 454 if (BPF_CLASS(insn1.code) == BPF_JMP) { 455 if (BPF_OP(insn1.code) != BPF_JA) { 456 // Recursively compare the "true" and "false" branches. 457 // A well-formed BPF program can't have any cycles, so we know 458 // that our recursive algorithm will ultimately terminate. 459 // In the unlikely event that the programmer made a mistake and 460 // went out of the way to give us a cyclic program, we will crash 461 // with a stack overflow. We are OK with that. 462 int c = PointerCompare(blocks.find(insn1.jt_ptr)->second, 463 blocks.find(insn2.jt_ptr)->second, 464 blocks); 465 if (c == 0) { 466 c = PointerCompare(blocks.find(insn1.jf_ptr)->second, 467 blocks.find(insn2.jf_ptr)->second, 468 blocks); 469 if (c == 0) { 470 continue; 471 } else { 472 return c; 473 } 474 } else { 475 return c; 476 } 477 } else { 478 int c = PointerCompare(blocks.find(insn1.jt_ptr)->second, 479 blocks.find(insn2.jt_ptr)->second, 480 blocks); 481 if (c == 0) { 482 continue; 483 } else { 484 return c; 485 } 486 } 487 } else { 488 continue; 489 } 490 } else { 491 return insn1.k - insn2.k; 492 } 493 } else { 494 return insn1.code - insn2.code; 495 } 496 } 497 } 498 499 void CodeGen::MergeTails(TargetsToBlocks* blocks) { 500 // We enter all of our basic blocks into a set using the BasicBlock::Less() 501 // comparator. This naturally results in blocks with identical tails of 502 // instructions to map to the same entry in the set. Whenever we discover 503 // that a particular chain of instructions is already in the set, we merge 504 // the basic blocks and update the pointer in the "blocks" map. 505 // Returns the number of unique basic blocks. 506 // N.B. We don't merge instructions on a granularity that is finer than 507 // a basic block. In practice, this is sufficiently rare that we don't 508 // incur a big cost. 509 // Similarly, we currently don't merge anything other than tails. In 510 // the future, we might decide to revisit this decision and attempt to 511 // merge arbitrary sub-sequences of instructions. 512 BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare); 513 typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set; 514 Set seen_basic_blocks(less); 515 for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end(); 516 ++iter) { 517 BasicBlock* bb = iter->second; 518 Set::const_iterator entry = seen_basic_blocks.find(bb); 519 if (entry == seen_basic_blocks.end()) { 520 // This is the first time we see this particular sequence of 521 // instructions. Enter the basic block into the set of known 522 // basic blocks. 523 seen_basic_blocks.insert(bb); 524 } else { 525 // We have previously seen another basic block that defines the same 526 // sequence of instructions. Merge the two blocks and update the 527 // pointer in the "blocks" map. 528 iter->second = *entry; 529 } 530 } 531 } 532 533 void CodeGen::ComputeIncomingBranches(BasicBlock* block, 534 const TargetsToBlocks& targets_to_blocks, 535 IncomingBranches* incoming_branches) { 536 // We increment the number of incoming branches each time we encounter a 537 // basic block. But we only traverse recursively the very first time we 538 // encounter a new block. This is necessary to make topological sorting 539 // work correctly. 540 if (++(*incoming_branches)[block] == 1) { 541 Instruction* last_insn = block->instructions.back(); 542 if (BPF_CLASS(last_insn->code) == BPF_JMP) { 543 ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second, 544 targets_to_blocks, 545 incoming_branches); 546 if (BPF_OP(last_insn->code) != BPF_JA) { 547 ComputeIncomingBranches( 548 targets_to_blocks.find(last_insn->jf_ptr)->second, 549 targets_to_blocks, 550 incoming_branches); 551 } 552 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { 553 ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second, 554 targets_to_blocks, 555 incoming_branches); 556 } 557 } 558 } 559 560 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block, 561 const TargetsToBlocks& blocks, 562 BasicBlocks* basic_blocks) { 563 // Textbook implementation of a toposort. We keep looking for basic blocks 564 // that don't have any incoming branches (initially, this is just the 565 // "first_block") and add them to the topologically sorted list of 566 // "basic_blocks". As we do so, we remove outgoing branches. This potentially 567 // ends up making our descendants eligible for the sorted list. The 568 // sorting algorithm terminates when there are no more basic blocks that have 569 // no incoming branches. If we didn't move all blocks from the set of 570 // "unordered_blocks" to the sorted list of "basic_blocks", there must have 571 // been a cyclic dependency. This should never happen in a BPF program, as 572 // well-formed BPF programs only ever have forward branches. 573 IncomingBranches unordered_blocks; 574 ComputeIncomingBranches(first_block, blocks, &unordered_blocks); 575 576 std::set<BasicBlock*> heads; 577 for (;;) { 578 // Move block from "unordered_blocks" to "basic_blocks". 579 basic_blocks->push_back(first_block); 580 581 // Inspect last instruction in the basic block. This is typically either a 582 // jump or a return statement. But it could also be a "normal" instruction 583 // that is followed by a jump target. 584 Instruction* last_insn = first_block->instructions.back(); 585 if (BPF_CLASS(last_insn->code) == BPF_JMP) { 586 // Remove outgoing branches. This might end up moving our descendants 587 // into set of "head" nodes that no longer have any incoming branches. 588 TargetsToBlocks::const_iterator iter; 589 if (BPF_OP(last_insn->code) != BPF_JA) { 590 iter = blocks.find(last_insn->jf_ptr); 591 if (!--unordered_blocks[iter->second]) { 592 heads.insert(iter->second); 593 } 594 } 595 iter = blocks.find(last_insn->jt_ptr); 596 if (!--unordered_blocks[iter->second]) { 597 first_block = iter->second; 598 continue; 599 } 600 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { 601 // We encountered an instruction that doesn't change code flow. Try to 602 // pick the next "first_block" from "last_insn->next", if possible. 603 TargetsToBlocks::const_iterator iter; 604 iter = blocks.find(last_insn->next); 605 if (!--unordered_blocks[iter->second]) { 606 first_block = iter->second; 607 continue; 608 } else { 609 // Our basic block is supposed to be followed by "last_insn->next", 610 // but dependencies prevent this from happening. Insert a BPF_JA 611 // instruction to correct the code flow. 612 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next); 613 first_block->instructions.push_back(ja); 614 last_insn->next = ja; 615 } 616 } 617 if (heads.empty()) { 618 if (unordered_blocks.size() != basic_blocks->size()) { 619 SANDBOX_DIE("Internal compiler error; cyclic graph detected"); 620 } 621 return; 622 } 623 // Proceed by picking an arbitrary node from the set of basic blocks that 624 // do not have any incoming branches. 625 first_block = *heads.begin(); 626 heads.erase(heads.begin()); 627 } 628 } 629 630 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks, 631 const TargetsToBlocks& targets_to_blocks) { 632 // While we previously used pointers in jt_ptr and jf_ptr to link jump 633 // instructions to their targets, we now convert these jumps to relative 634 // jumps that are suitable for loading the BPF program into the kernel. 635 int offset = 0; 636 637 // Since we just completed a toposort, all jump targets are guaranteed to 638 // go forward. This means, iterating over the basic blocks in reverse makes 639 // it trivial to compute the correct offsets. 640 BasicBlock* bb = NULL; 641 BasicBlock* last_bb = NULL; 642 for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin(); 643 iter != basic_blocks->rend(); 644 ++iter) { 645 last_bb = bb; 646 bb = *iter; 647 Instruction* insn = bb->instructions.back(); 648 if (BPF_CLASS(insn->code) == BPF_JMP) { 649 // Basic block ended in a jump instruction. We can now compute the 650 // appropriate offsets. 651 if (BPF_OP(insn->code) == BPF_JA) { 652 // "Always" jumps use the 32bit "k" field for the offset, instead 653 // of the 8bit "jt" and "jf" fields. 654 int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; 655 insn->k = jmp; 656 insn->jt = insn->jf = 0; 657 } else { 658 // The offset computations for conditional jumps are just the same 659 // as for "always" jumps. 660 int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; 661 int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset; 662 663 // There is an added complication, because conditional relative jumps 664 // can only jump at most 255 instructions forward. If we have to jump 665 // further, insert an extra "always" jump. 666 Instructions::size_type jmp = bb->instructions.size(); 667 if (jt > 255 || (jt == 255 && jf > 255)) { 668 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr); 669 bb->instructions.push_back(ja); 670 ja->k = jt; 671 ja->jt = ja->jf = 0; 672 673 // The newly inserted "always" jump, of course, requires us to adjust 674 // the jump targets in the original conditional jump. 675 jt = 0; 676 ++jf; 677 } 678 if (jf > 255) { 679 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr); 680 bb->instructions.insert(bb->instructions.begin() + jmp, ja); 681 ja->k = jf; 682 ja->jt = ja->jf = 0; 683 684 // Again, we have to adjust the jump targets in the original 685 // conditional jump. 686 ++jt; 687 jf = 0; 688 } 689 690 // Now we can finally set the relative jump targets in the conditional 691 // jump instruction. Afterwards, we must no longer access the jt_ptr 692 // and jf_ptr fields. 693 insn->jt = jt; 694 insn->jf = jf; 695 } 696 } else if (BPF_CLASS(insn->code) != BPF_RET && 697 targets_to_blocks.find(insn->next)->second != last_bb) { 698 SANDBOX_DIE("Internal compiler error; invalid basic block encountered"); 699 } 700 701 // Proceed to next basic block. 702 offset += bb->instructions.size(); 703 bb->offset = offset; 704 } 705 return; 706 } 707 708 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks, 709 SandboxBPF::Program* program) { 710 // Our basic blocks have been sorted and relative jump offsets have been 711 // computed. The last remaining step is for all the instructions in our 712 // basic blocks to be concatenated into a BPF program. 713 program->clear(); 714 for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin(); 715 bb_iter != basic_blocks.end(); 716 ++bb_iter) { 717 const BasicBlock& bb = **bb_iter; 718 for (Instructions::const_iterator insn_iter = bb.instructions.begin(); 719 insn_iter != bb.instructions.end(); 720 ++insn_iter) { 721 const Instruction& insn = **insn_iter; 722 program->push_back( 723 (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k}); 724 } 725 } 726 return; 727 } 728 729 void CodeGen::Compile(Instruction* instructions, SandboxBPF::Program* program) { 730 if (compiled_) { 731 SANDBOX_DIE( 732 "Cannot call Compile() multiple times. Create a new code " 733 "generator instead"); 734 } 735 compiled_ = true; 736 737 BranchTargets branch_targets; 738 FindBranchTargets(*instructions, &branch_targets); 739 TargetsToBlocks all_blocks; 740 BasicBlock* first_block = 741 CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks); 742 MergeTails(&all_blocks); 743 BasicBlocks basic_blocks; 744 TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks); 745 ComputeRelativeJumps(&basic_blocks, all_blocks); 746 ConcatenateBasicBlocks(basic_blocks, program); 747 return; 748 } 749 750 } // namespace sandbox 751