1 //===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/MC/MCObjectDisassembler.h" 11 #include "llvm/ADT/STLExtras.h" 12 #include "llvm/ADT/SetVector.h" 13 #include "llvm/ADT/StringExtras.h" 14 #include "llvm/ADT/StringRef.h" 15 #include "llvm/ADT/Twine.h" 16 #include "llvm/MC/MCAtom.h" 17 #include "llvm/MC/MCDisassembler.h" 18 #include "llvm/MC/MCFunction.h" 19 #include "llvm/MC/MCInstrAnalysis.h" 20 #include "llvm/MC/MCModule.h" 21 #include "llvm/Object/ObjectFile.h" 22 #include "llvm/Support/MemoryObject.h" 23 #include "llvm/Support/StringRefMemoryObject.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <map> 26 #include <set> 27 28 using namespace llvm; 29 using namespace object; 30 31 MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj, 32 const MCDisassembler &Dis, 33 const MCInstrAnalysis &MIA) 34 : Obj(Obj), Dis(Dis), MIA(MIA) {} 35 36 MCModule *MCObjectDisassembler::buildModule(bool withCFG) { 37 MCModule *Module = new MCModule; 38 buildSectionAtoms(Module); 39 if (withCFG) 40 buildCFG(Module); 41 return Module; 42 } 43 44 void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) { 45 error_code ec; 46 for (section_iterator SI = Obj.begin_sections(), 47 SE = Obj.end_sections(); 48 SI != SE; 49 SI.increment(ec)) { 50 if (ec) break; 51 52 bool isText; SI->isText(isText); 53 bool isData; SI->isData(isData); 54 if (!isData && !isText) 55 continue; 56 57 uint64_t StartAddr; SI->getAddress(StartAddr); 58 uint64_t SecSize; SI->getSize(SecSize); 59 if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize) 60 continue; 61 62 StringRef Contents; SI->getContents(Contents); 63 StringRefMemoryObject memoryObject(Contents); 64 65 // We don't care about things like non-file-backed sections yet. 66 if (Contents.size() != SecSize || !SecSize) 67 continue; 68 uint64_t EndAddr = StartAddr + SecSize - 1; 69 70 StringRef SecName; SI->getName(SecName); 71 72 if (isText) { 73 MCTextAtom *Text = Module->createTextAtom(StartAddr, EndAddr); 74 Text->setName(SecName); 75 uint64_t InstSize; 76 for (uint64_t Index = 0; Index < SecSize; Index += InstSize) { 77 MCInst Inst; 78 if (Dis.getInstruction(Inst, InstSize, memoryObject, Index, 79 nulls(), nulls())) 80 Text->addInst(Inst, InstSize); 81 else 82 // We don't care about splitting mixed atoms either. 83 llvm_unreachable("Couldn't disassemble instruction in atom."); 84 } 85 86 } else { 87 MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr); 88 Data->setName(SecName); 89 for (uint64_t Index = 0; Index < SecSize; ++Index) 90 Data->addData(Contents[Index]); 91 } 92 } 93 } 94 95 namespace { 96 struct BBInfo; 97 typedef std::set<BBInfo*> BBInfoSetTy; 98 99 struct BBInfo { 100 MCTextAtom *Atom; 101 MCBasicBlock *BB; 102 BBInfoSetTy Succs; 103 BBInfoSetTy Preds; 104 105 void addSucc(BBInfo &Succ) { 106 Succs.insert(&Succ); 107 Succ.Preds.insert(this); 108 } 109 }; 110 } 111 112 void MCObjectDisassembler::buildCFG(MCModule *Module) { 113 typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy; 114 BBInfoByAddrTy BBInfos; 115 typedef std::set<uint64_t> AddressSetTy; 116 AddressSetTy Splits; 117 AddressSetTy Calls; 118 119 assert(Module->func_begin() == Module->func_end() 120 && "Module already has a CFG!"); 121 122 // First, determine the basic block boundaries and call targets. 123 for (MCModule::atom_iterator AI = Module->atom_begin(), 124 AE = Module->atom_end(); 125 AI != AE; ++AI) { 126 MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI); 127 if (!TA) continue; 128 Calls.insert(TA->getBeginAddr()); 129 BBInfos[TA->getBeginAddr()].Atom = TA; 130 for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end(); 131 II != IE; ++II) { 132 if (MIA.isTerminator(II->Inst)) 133 Splits.insert(II->Address + II->Size); 134 uint64_t Target; 135 if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) { 136 if (MIA.isCall(II->Inst)) 137 Calls.insert(Target); 138 Splits.insert(Target); 139 } 140 } 141 } 142 143 // Split text atoms into basic block atoms. 144 for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end(); 145 SI != SE; ++SI) { 146 MCAtom *A = Module->findAtomContaining(*SI); 147 if (!A) continue; 148 MCTextAtom *TA = cast<MCTextAtom>(A); 149 if (TA->getBeginAddr() == *SI) 150 continue; 151 MCTextAtom *NewAtom = TA->split(*SI); 152 BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom; 153 StringRef BBName = TA->getName(); 154 BBName = BBName.substr(0, BBName.find_last_of(':')); 155 NewAtom->setName((BBName + ":" + utohexstr(*SI)).str()); 156 } 157 158 // Compute succs/preds. 159 for (MCModule::atom_iterator AI = Module->atom_begin(), 160 AE = Module->atom_end(); 161 AI != AE; ++AI) { 162 MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI); 163 if (!TA) continue; 164 BBInfo &CurBB = BBInfos[TA->getBeginAddr()]; 165 const MCDecodedInst &LI = TA->back(); 166 if (MIA.isBranch(LI.Inst)) { 167 uint64_t Target; 168 if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target)) 169 CurBB.addSucc(BBInfos[Target]); 170 if (MIA.isConditionalBranch(LI.Inst)) 171 CurBB.addSucc(BBInfos[LI.Address + LI.Size]); 172 } else if (!MIA.isTerminator(LI.Inst)) 173 CurBB.addSucc(BBInfos[LI.Address + LI.Size]); 174 } 175 176 177 // Create functions and basic blocks. 178 for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end(); 179 CI != CE; ++CI) { 180 BBInfo &BBI = BBInfos[*CI]; 181 if (!BBI.Atom) continue; 182 183 MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName()); 184 185 // Create MCBBs. 186 SmallSetVector<BBInfo*, 16> Worklist; 187 Worklist.insert(&BBI); 188 for (size_t WI = 0; WI < Worklist.size(); ++WI) { 189 BBInfo *BBI = Worklist[WI]; 190 if (!BBI->Atom) 191 continue; 192 BBI->BB = &MCFN.createBlock(*BBI->Atom); 193 // Add all predecessors and successors to the worklist. 194 for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); 195 SI != SE; ++SI) 196 Worklist.insert(*SI); 197 for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); 198 PI != PE; ++PI) 199 Worklist.insert(*PI); 200 } 201 202 // Set preds/succs. 203 for (size_t WI = 0; WI < Worklist.size(); ++WI) { 204 BBInfo *BBI = Worklist[WI]; 205 MCBasicBlock *MCBB = BBI->BB; 206 if (!MCBB) 207 continue; 208 for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); 209 SI != SE; ++SI) 210 MCBB->addSuccessor((*SI)->BB); 211 for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); 212 PI != PE; ++PI) 213 MCBB->addPredecessor((*PI)->BB); 214 } 215 } 216 } 217