Home | History | Annotate | Download | only in llvm-objdump
      1 //===-- MCFunction.cpp ----------------------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the algorithm to break down a region of machine code
     11 // into basic blocks and try to reconstruct a CFG from it.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "MCFunction.h"
     16 #include "llvm/ADT/STLExtras.h"
     17 #include "llvm/MC/MCDisassembler.h"
     18 #include "llvm/MC/MCInst.h"
     19 #include "llvm/MC/MCInstPrinter.h"
     20 #include "llvm/MC/MCInstrAnalysis.h"
     21 #include "llvm/MC/MCInstrDesc.h"
     22 #include "llvm/MC/MCInstrInfo.h"
     23 #include "llvm/Support/MemoryObject.h"
     24 #include "llvm/Support/raw_ostream.h"
     25 #include "llvm/Support/system_error.h"
     26 #include <set>
     27 using namespace llvm;
     28 
     29 MCFunction
     30 MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
     31                                  const MemoryObject &Region, uint64_t Start,
     32                                  uint64_t End, const MCInstrAnalysis *Ana,
     33                                  raw_ostream &DebugOut,
     34                                  SmallVectorImpl<uint64_t> &Calls) {
     35   std::vector<MCDecodedInst> Instructions;
     36   std::set<uint64_t> Splits;
     37   Splits.insert(Start);
     38   uint64_t Size;
     39 
     40   MCFunction f(Name);
     41 
     42   {
     43   DenseSet<uint64_t> VisitedInsts;
     44   SmallVector<uint64_t, 16> WorkList;
     45   WorkList.push_back(Start);
     46   // Disassemble code and gather basic block split points.
     47   while (!WorkList.empty()) {
     48     uint64_t Index = WorkList.pop_back_val();
     49     if (VisitedInsts.find(Index) != VisitedInsts.end())
     50       continue; // Already visited this location.
     51 
     52     for (;Index < End; Index += Size) {
     53       VisitedInsts.insert(Index);
     54 
     55       MCInst Inst;
     56       if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){
     57         Instructions.push_back(MCDecodedInst(Index, Size, Inst));
     58         if (Ana->isBranch(Inst)) {
     59           uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
     60           if (targ != -1ULL && targ == Index+Size)
     61             continue; // Skip nop jumps.
     62 
     63           // If we could determine the branch target, make a note to start a
     64           // new basic block there and add the target to the worklist.
     65           if (targ != -1ULL) {
     66             Splits.insert(targ);
     67             WorkList.push_back(targ);
     68             WorkList.push_back(Index+Size);
     69           }
     70           Splits.insert(Index+Size);
     71           break;
     72         } else if (Ana->isReturn(Inst)) {
     73           // Return instruction. This basic block ends here.
     74           Splits.insert(Index+Size);
     75           break;
     76         } else if (Ana->isCall(Inst)) {
     77           uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
     78           // Add the call to the call list if the destination is known.
     79           if (targ != -1ULL && targ != Index+Size)
     80             Calls.push_back(targ);
     81         }
     82       } else {
     83         errs().write_hex(Index) << ": warning: invalid instruction encoding\n";
     84         if (Size == 0)
     85           Size = 1; // skip illegible bytes
     86       }
     87     }
     88   }
     89   }
     90 
     91   // Make sure the instruction list is sorted.
     92   std::sort(Instructions.begin(), Instructions.end());
     93 
     94   // Create basic blocks.
     95   unsigned ii = 0, ie = Instructions.size();
     96   for (std::set<uint64_t>::iterator spi = Splits.begin(),
     97        spe = llvm::prior(Splits.end()); spi != spe; ++spi) {
     98     MCBasicBlock BB;
     99     uint64_t BlockEnd = *llvm::next(spi);
    100     // Add instructions to the BB.
    101     for (; ii != ie; ++ii) {
    102       if (Instructions[ii].Address < *spi ||
    103           Instructions[ii].Address >= BlockEnd)
    104         break;
    105       BB.addInst(Instructions[ii]);
    106     }
    107     f.addBlock(*spi, BB);
    108   }
    109 
    110   std::sort(f.Blocks.begin(), f.Blocks.end());
    111 
    112   // Calculate successors of each block.
    113   for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
    114     MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second);
    115     if (BB.getInsts().empty()) continue;
    116     const MCDecodedInst &Inst = BB.getInsts().back();
    117 
    118     if (Ana->isBranch(Inst.Inst)) {
    119       uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size);
    120       if (targ == -1ULL) {
    121         // Indirect branch. Bail and add all blocks of the function as a
    122         // successor.
    123         for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
    124           BB.addSucc(i->first);
    125       } else if (targ != Inst.Address+Inst.Size)
    126         BB.addSucc(targ);
    127       // Conditional branches can also fall through to the next block.
    128       if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e)
    129         BB.addSucc(llvm::next(i)->first);
    130     } else {
    131       // No branch. Fall through to the next block.
    132       if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e)
    133         BB.addSucc(llvm::next(i)->first);
    134     }
    135   }
    136 
    137   return f;
    138 }
    139