1 //===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Enhanced Disassembly library's disassembler class. 11 // The disassembler is responsible for vending individual instructions according 12 // to a given architecture and disassembly syntax. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "EDDisassembler.h" 17 #include "EDInst.h" 18 #include "llvm/MC/EDInstInfo.h" 19 #include "llvm/MC/MCAsmInfo.h" 20 #include "llvm/MC/MCContext.h" 21 #include "llvm/MC/MCDisassembler.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstPrinter.h" 25 #include "llvm/MC/MCRegisterInfo.h" 26 #include "llvm/MC/MCStreamer.h" 27 #include "llvm/MC/MCSubtargetInfo.h" 28 #include "llvm/MC/MCParser/AsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCTargetAsmLexer.h" 32 #include "llvm/MC/MCTargetAsmParser.h" 33 #include "llvm/Support/MemoryBuffer.h" 34 #include "llvm/Support/MemoryObject.h" 35 #include "llvm/Support/SourceMgr.h" 36 #include "llvm/Support/TargetRegistry.h" 37 #include "llvm/Support/TargetSelect.h" 38 using namespace llvm; 39 40 bool EDDisassembler::sInitialized = false; 41 EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers; 42 43 struct TripleMap { 44 Triple::ArchType Arch; 45 const char *String; 46 }; 47 48 static struct TripleMap triplemap[] = { 49 { Triple::x86, "i386-unknown-unknown" }, 50 { Triple::x86_64, "x86_64-unknown-unknown" }, 51 { Triple::arm, "arm-unknown-unknown" }, 52 { Triple::thumb, "thumb-unknown-unknown" }, 53 { Triple::InvalidArch, NULL, } 54 }; 55 56 /// infoFromArch - Returns the TripleMap corresponding to a given architecture, 57 /// or NULL if there is an error 58 /// 59 /// @arg arch - The Triple::ArchType for the desired architecture 60 static const char *tripleFromArch(Triple::ArchType arch) { 61 unsigned int infoIndex; 62 63 for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) { 64 if (arch == triplemap[infoIndex].Arch) 65 return triplemap[infoIndex].String; 66 } 67 68 return NULL; 69 } 70 71 /// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer 72 /// for the desired assembly syntax, suitable for passing to 73 /// Target::createMCInstPrinter() 74 /// 75 /// @arg arch - The target architecture 76 /// @arg syntax - The assembly syntax in sd form 77 static int getLLVMSyntaxVariant(Triple::ArchType arch, 78 EDDisassembler::AssemblySyntax syntax) { 79 switch (syntax) { 80 default: 81 return -1; 82 // Mappings below from X86AsmPrinter.cpp 83 case EDDisassembler::kEDAssemblySyntaxX86ATT: 84 if (arch == Triple::x86 || arch == Triple::x86_64) 85 return 0; 86 else 87 return -1; 88 case EDDisassembler::kEDAssemblySyntaxX86Intel: 89 if (arch == Triple::x86 || arch == Triple::x86_64) 90 return 1; 91 else 92 return -1; 93 case EDDisassembler::kEDAssemblySyntaxARMUAL: 94 if (arch == Triple::arm || arch == Triple::thumb) 95 return 0; 96 else 97 return -1; 98 } 99 } 100 101 void EDDisassembler::initialize() { 102 if (sInitialized) 103 return; 104 105 sInitialized = true; 106 107 InitializeAllTargetInfos(); 108 InitializeAllTargetMCs(); 109 InitializeAllAsmParsers(); 110 InitializeAllDisassemblers(); 111 } 112 113 #undef BRINGUP_TARGET 114 115 EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch, 116 AssemblySyntax syntax) { 117 CPUKey key; 118 key.Arch = arch; 119 key.Syntax = syntax; 120 121 EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key); 122 123 if (i != sDisassemblers.end()) { 124 return i->second; 125 } else { 126 EDDisassembler* sdd = new EDDisassembler(key); 127 if (!sdd->valid()) { 128 delete sdd; 129 return NULL; 130 } 131 132 sDisassemblers[key] = sdd; 133 134 return sdd; 135 } 136 137 return NULL; 138 } 139 140 EDDisassembler *EDDisassembler::getDisassembler(StringRef str, 141 AssemblySyntax syntax) { 142 return getDisassembler(Triple(str).getArch(), syntax); 143 } 144 145 EDDisassembler::EDDisassembler(CPUKey &key) : 146 Valid(false), 147 HasSemantics(false), 148 ErrorStream(nulls()), 149 Key(key) { 150 const char *triple = tripleFromArch(key.Arch); 151 152 if (!triple) 153 return; 154 155 LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax); 156 157 if (LLVMSyntaxVariant < 0) 158 return; 159 160 std::string tripleString(triple); 161 std::string errorString; 162 163 Tgt = TargetRegistry::lookupTarget(tripleString, 164 errorString); 165 166 if (!Tgt) 167 return; 168 169 MRI.reset(Tgt->createMCRegInfo(tripleString)); 170 171 if (!MRI) 172 return; 173 174 initMaps(*MRI); 175 176 AsmInfo.reset(Tgt->createMCAsmInfo(tripleString)); 177 178 if (!AsmInfo) 179 return; 180 181 STI.reset(Tgt->createMCSubtargetInfo(tripleString, "", "")); 182 183 if (!STI) 184 return; 185 186 Disassembler.reset(Tgt->createMCDisassembler(*STI)); 187 188 if (!Disassembler) 189 return; 190 191 InstInfos = Disassembler->getEDInfo(); 192 193 InstString.reset(new std::string); 194 InstStream.reset(new raw_string_ostream(*InstString)); 195 InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo, *STI)); 196 197 if (!InstPrinter) 198 return; 199 200 GenericAsmLexer.reset(new AsmLexer(*AsmInfo)); 201 SpecificAsmLexer.reset(Tgt->createMCAsmLexer(*MRI, *AsmInfo)); 202 SpecificAsmLexer->InstallLexer(*GenericAsmLexer); 203 204 initMaps(*MRI); 205 206 Valid = true; 207 } 208 209 EDDisassembler::~EDDisassembler() { 210 if (!valid()) 211 return; 212 } 213 214 namespace { 215 /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback 216 /// as provided by the sd interface. See MemoryObject. 217 class EDMemoryObject : public llvm::MemoryObject { 218 private: 219 EDByteReaderCallback Callback; 220 void *Arg; 221 public: 222 EDMemoryObject(EDByteReaderCallback callback, 223 void *arg) : Callback(callback), Arg(arg) { } 224 ~EDMemoryObject() { } 225 uint64_t getBase() const { return 0x0; } 226 uint64_t getExtent() const { return (uint64_t)-1; } 227 int readByte(uint64_t address, uint8_t *ptr) const { 228 if (!Callback) 229 return -1; 230 231 if (Callback(ptr, address, Arg)) 232 return -1; 233 234 return 0; 235 } 236 }; 237 } 238 239 EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, 240 uint64_t address, 241 void *arg) { 242 EDMemoryObject memoryObject(byteReader, arg); 243 244 MCInst* inst = new MCInst; 245 uint64_t byteSize; 246 247 MCDisassembler::DecodeStatus S; 248 S = Disassembler->getInstruction(*inst, byteSize, memoryObject, address, 249 ErrorStream, nulls()); 250 switch (S) { 251 case MCDisassembler::Fail: 252 case MCDisassembler::SoftFail: 253 // FIXME: Do something different on soft failure mode? 254 delete inst; 255 return NULL; 256 257 case MCDisassembler::Success: { 258 const llvm::EDInstInfo *thisInstInfo = NULL; 259 260 if (InstInfos) { 261 thisInstInfo = &InstInfos[inst->getOpcode()]; 262 } 263 264 EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo); 265 return sdInst; 266 } 267 } 268 return NULL; 269 } 270 271 void EDDisassembler::initMaps(const MCRegisterInfo ®isterInfo) { 272 unsigned numRegisters = registerInfo.getNumRegs(); 273 unsigned registerIndex; 274 275 for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) { 276 const char* registerName = registerInfo.get(registerIndex).Name; 277 278 RegVec.push_back(registerName); 279 RegRMap[registerName] = registerIndex; 280 } 281 282 switch (Key.Arch) { 283 default: 284 break; 285 case Triple::x86: 286 case Triple::x86_64: 287 stackPointers.insert(registerIDWithName("SP")); 288 stackPointers.insert(registerIDWithName("ESP")); 289 stackPointers.insert(registerIDWithName("RSP")); 290 291 programCounters.insert(registerIDWithName("IP")); 292 programCounters.insert(registerIDWithName("EIP")); 293 programCounters.insert(registerIDWithName("RIP")); 294 break; 295 case Triple::arm: 296 case Triple::thumb: 297 stackPointers.insert(registerIDWithName("SP")); 298 299 programCounters.insert(registerIDWithName("PC")); 300 break; 301 } 302 } 303 304 const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const { 305 if (registerID >= RegVec.size()) 306 return NULL; 307 else 308 return RegVec[registerID].c_str(); 309 } 310 311 unsigned EDDisassembler::registerIDWithName(const char *name) const { 312 regrmap_t::const_iterator iter = RegRMap.find(std::string(name)); 313 if (iter == RegRMap.end()) 314 return 0; 315 else 316 return (*iter).second; 317 } 318 319 bool EDDisassembler::registerIsStackPointer(unsigned registerID) { 320 return (stackPointers.find(registerID) != stackPointers.end()); 321 } 322 323 bool EDDisassembler::registerIsProgramCounter(unsigned registerID) { 324 return (programCounters.find(registerID) != programCounters.end()); 325 } 326 327 int EDDisassembler::printInst(std::string &str, MCInst &inst) { 328 PrinterMutex.acquire(); 329 330 InstPrinter->printInst(&inst, *InstStream, ""); 331 InstStream->flush(); 332 str = *InstString; 333 InstString->clear(); 334 335 PrinterMutex.release(); 336 337 return 0; 338 } 339 340 static void diag_handler(const SMDiagnostic &diag, 341 void *context) 342 { 343 if (context) { 344 EDDisassembler *disassembler = static_cast<EDDisassembler*>(context); 345 diag.Print("", disassembler->ErrorStream); 346 } 347 } 348 349 int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands, 350 SmallVectorImpl<AsmToken> &tokens, 351 const std::string &str) { 352 int ret = 0; 353 354 switch (Key.Arch) { 355 default: 356 return -1; 357 case Triple::x86: 358 case Triple::x86_64: 359 case Triple::arm: 360 case Triple::thumb: 361 break; 362 } 363 364 const char *cStr = str.c_str(); 365 MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr)); 366 367 StringRef instName; 368 SMLoc instLoc; 369 370 SourceMgr sourceMgr; 371 sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this)); 372 sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over 373 MCContext context(*AsmInfo, *MRI, NULL); 374 OwningPtr<MCStreamer> streamer(createNullStreamer(context)); 375 OwningPtr<MCAsmParser> genericParser(createMCAsmParser(sourceMgr, 376 context, *streamer, 377 *AsmInfo)); 378 379 StringRef triple = tripleFromArch(Key.Arch); 380 OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", "")); 381 OwningPtr<MCTargetAsmParser> 382 TargetParser(Tgt->createMCAsmParser(*STI, *genericParser)); 383 384 AsmToken OpcodeToken = genericParser->Lex(); 385 AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to 386 387 if (OpcodeToken.is(AsmToken::Identifier)) { 388 instName = OpcodeToken.getString(); 389 instLoc = OpcodeToken.getLoc(); 390 391 if (NextToken.isNot(AsmToken::Eof) && 392 TargetParser->ParseInstruction(instName, instLoc, operands)) 393 ret = -1; 394 } else { 395 ret = -1; 396 } 397 398 ParserMutex.acquire(); 399 400 if (!ret) { 401 GenericAsmLexer->setBuffer(buf); 402 403 while (SpecificAsmLexer->Lex(), 404 SpecificAsmLexer->isNot(AsmToken::Eof) && 405 SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) { 406 if (SpecificAsmLexer->is(AsmToken::Error)) { 407 ret = -1; 408 break; 409 } 410 tokens.push_back(SpecificAsmLexer->getTok()); 411 } 412 } 413 414 ParserMutex.release(); 415 416 return ret; 417 } 418 419 int EDDisassembler::llvmSyntaxVariant() const { 420 return LLVMSyntaxVariant; 421 } 422