1 //===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Enhanced Disassembly library's disassembler class. 11 // The disassembler is responsible for vending individual instructions according 12 // to a given architecture and disassembly syntax. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "EDDisassembler.h" 17 #include "EDInst.h" 18 #include "llvm/MC/EDInstInfo.h" 19 #include "llvm/MC/MCAsmInfo.h" 20 #include "llvm/MC/MCContext.h" 21 #include "llvm/MC/MCDisassembler.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstPrinter.h" 25 #include "llvm/MC/MCRegisterInfo.h" 26 #include "llvm/MC/MCStreamer.h" 27 #include "llvm/MC/MCSubtargetInfo.h" 28 #include "llvm/MC/MCParser/AsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/Support/MemoryBuffer.h" 32 #include "llvm/Support/MemoryObject.h" 33 #include "llvm/Support/SourceMgr.h" 34 #include "llvm/Target/TargetAsmLexer.h" 35 #include "llvm/Target/TargetAsmParser.h" 36 #include "llvm/Target/TargetRegistry.h" 37 #include "llvm/Target/TargetMachine.h" 38 #include "llvm/Target/TargetRegisterInfo.h" 39 #include "llvm/Target/TargetSelect.h" 40 using namespace llvm; 41 42 bool EDDisassembler::sInitialized = false; 43 EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers; 44 45 struct TripleMap { 46 Triple::ArchType Arch; 47 const char *String; 48 }; 49 50 static struct TripleMap triplemap[] = { 51 { Triple::x86, "i386-unknown-unknown" }, 52 { Triple::x86_64, "x86_64-unknown-unknown" }, 53 { Triple::arm, "arm-unknown-unknown" }, 54 { Triple::thumb, "thumb-unknown-unknown" }, 55 { Triple::InvalidArch, NULL, } 56 }; 57 58 /// infoFromArch - Returns the TripleMap corresponding to a given architecture, 59 /// or NULL if there is an error 60 /// 61 /// @arg arch - The Triple::ArchType for the desired architecture 62 static const char *tripleFromArch(Triple::ArchType arch) { 63 unsigned int infoIndex; 64 65 for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) { 66 if (arch == triplemap[infoIndex].Arch) 67 return triplemap[infoIndex].String; 68 } 69 70 return NULL; 71 } 72 73 /// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer 74 /// for the desired assembly syntax, suitable for passing to 75 /// Target::createMCInstPrinter() 76 /// 77 /// @arg arch - The target architecture 78 /// @arg syntax - The assembly syntax in sd form 79 static int getLLVMSyntaxVariant(Triple::ArchType arch, 80 EDDisassembler::AssemblySyntax syntax) { 81 switch (syntax) { 82 default: 83 return -1; 84 // Mappings below from X86AsmPrinter.cpp 85 case EDDisassembler::kEDAssemblySyntaxX86ATT: 86 if (arch == Triple::x86 || arch == Triple::x86_64) 87 return 0; 88 else 89 return -1; 90 case EDDisassembler::kEDAssemblySyntaxX86Intel: 91 if (arch == Triple::x86 || arch == Triple::x86_64) 92 return 1; 93 else 94 return -1; 95 case EDDisassembler::kEDAssemblySyntaxARMUAL: 96 if (arch == Triple::arm || arch == Triple::thumb) 97 return 0; 98 else 99 return -1; 100 } 101 } 102 103 void EDDisassembler::initialize() { 104 if (sInitialized) 105 return; 106 107 sInitialized = true; 108 109 InitializeAllTargetInfos(); 110 InitializeAllTargets(); 111 InitializeAllMCCodeGenInfos(); 112 InitializeAllMCAsmInfos(); 113 InitializeAllMCRegisterInfos(); 114 InitializeAllMCSubtargetInfos(); 115 InitializeAllAsmPrinters(); 116 InitializeAllAsmParsers(); 117 InitializeAllDisassemblers(); 118 } 119 120 #undef BRINGUP_TARGET 121 122 EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch, 123 AssemblySyntax syntax) { 124 CPUKey key; 125 key.Arch = arch; 126 key.Syntax = syntax; 127 128 EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key); 129 130 if (i != sDisassemblers.end()) { 131 return i->second; 132 } else { 133 EDDisassembler* sdd = new EDDisassembler(key); 134 if (!sdd->valid()) { 135 delete sdd; 136 return NULL; 137 } 138 139 sDisassemblers[key] = sdd; 140 141 return sdd; 142 } 143 144 return NULL; 145 } 146 147 EDDisassembler *EDDisassembler::getDisassembler(StringRef str, 148 AssemblySyntax syntax) { 149 return getDisassembler(Triple(str).getArch(), syntax); 150 } 151 152 EDDisassembler::EDDisassembler(CPUKey &key) : 153 Valid(false), 154 HasSemantics(false), 155 ErrorStream(nulls()), 156 Key(key) { 157 const char *triple = tripleFromArch(key.Arch); 158 159 if (!triple) 160 return; 161 162 LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax); 163 164 if (LLVMSyntaxVariant < 0) 165 return; 166 167 std::string tripleString(triple); 168 std::string errorString; 169 170 Tgt = TargetRegistry::lookupTarget(tripleString, 171 errorString); 172 173 if (!Tgt) 174 return; 175 176 std::string CPU; 177 std::string featureString; 178 TargetMachine.reset(Tgt->createTargetMachine(tripleString, CPU, 179 featureString)); 180 181 const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo(); 182 183 if (!registerInfo) 184 return; 185 186 initMaps(*registerInfo); 187 188 AsmInfo.reset(Tgt->createMCAsmInfo(tripleString)); 189 190 if (!AsmInfo) 191 return; 192 193 MRI.reset(Tgt->createMCRegInfo(tripleString)); 194 195 if (!MRI) 196 return; 197 198 Disassembler.reset(Tgt->createMCDisassembler()); 199 200 if (!Disassembler) 201 return; 202 203 InstInfos = Disassembler->getEDInfo(); 204 205 InstString.reset(new std::string); 206 InstStream.reset(new raw_string_ostream(*InstString)); 207 InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo)); 208 209 if (!InstPrinter) 210 return; 211 212 GenericAsmLexer.reset(new AsmLexer(*AsmInfo)); 213 SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo)); 214 SpecificAsmLexer->InstallLexer(*GenericAsmLexer); 215 216 initMaps(*TargetMachine->getRegisterInfo()); 217 218 Valid = true; 219 } 220 221 EDDisassembler::~EDDisassembler() { 222 if (!valid()) 223 return; 224 } 225 226 namespace { 227 /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback 228 /// as provided by the sd interface. See MemoryObject. 229 class EDMemoryObject : public llvm::MemoryObject { 230 private: 231 EDByteReaderCallback Callback; 232 void *Arg; 233 public: 234 EDMemoryObject(EDByteReaderCallback callback, 235 void *arg) : Callback(callback), Arg(arg) { } 236 ~EDMemoryObject() { } 237 uint64_t getBase() const { return 0x0; } 238 uint64_t getExtent() const { return (uint64_t)-1; } 239 int readByte(uint64_t address, uint8_t *ptr) const { 240 if (!Callback) 241 return -1; 242 243 if (Callback(ptr, address, Arg)) 244 return -1; 245 246 return 0; 247 } 248 }; 249 } 250 251 EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, 252 uint64_t address, 253 void *arg) { 254 EDMemoryObject memoryObject(byteReader, arg); 255 256 MCInst* inst = new MCInst; 257 uint64_t byteSize; 258 259 if (!Disassembler->getInstruction(*inst, 260 byteSize, 261 memoryObject, 262 address, 263 ErrorStream)) { 264 delete inst; 265 return NULL; 266 } else { 267 const llvm::EDInstInfo *thisInstInfo = NULL; 268 269 if (InstInfos) { 270 thisInstInfo = &InstInfos[inst->getOpcode()]; 271 } 272 273 EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo); 274 return sdInst; 275 } 276 } 277 278 void EDDisassembler::initMaps(const TargetRegisterInfo ®isterInfo) { 279 unsigned numRegisters = registerInfo.getNumRegs(); 280 unsigned registerIndex; 281 282 for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) { 283 const char* registerName = registerInfo.get(registerIndex).Name; 284 285 RegVec.push_back(registerName); 286 RegRMap[registerName] = registerIndex; 287 } 288 289 switch (Key.Arch) { 290 default: 291 break; 292 case Triple::x86: 293 case Triple::x86_64: 294 stackPointers.insert(registerIDWithName("SP")); 295 stackPointers.insert(registerIDWithName("ESP")); 296 stackPointers.insert(registerIDWithName("RSP")); 297 298 programCounters.insert(registerIDWithName("IP")); 299 programCounters.insert(registerIDWithName("EIP")); 300 programCounters.insert(registerIDWithName("RIP")); 301 break; 302 case Triple::arm: 303 case Triple::thumb: 304 stackPointers.insert(registerIDWithName("SP")); 305 306 programCounters.insert(registerIDWithName("PC")); 307 break; 308 } 309 } 310 311 const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const { 312 if (registerID >= RegVec.size()) 313 return NULL; 314 else 315 return RegVec[registerID].c_str(); 316 } 317 318 unsigned EDDisassembler::registerIDWithName(const char *name) const { 319 regrmap_t::const_iterator iter = RegRMap.find(std::string(name)); 320 if (iter == RegRMap.end()) 321 return 0; 322 else 323 return (*iter).second; 324 } 325 326 bool EDDisassembler::registerIsStackPointer(unsigned registerID) { 327 return (stackPointers.find(registerID) != stackPointers.end()); 328 } 329 330 bool EDDisassembler::registerIsProgramCounter(unsigned registerID) { 331 return (programCounters.find(registerID) != programCounters.end()); 332 } 333 334 int EDDisassembler::printInst(std::string &str, MCInst &inst) { 335 PrinterMutex.acquire(); 336 337 InstPrinter->printInst(&inst, *InstStream); 338 InstStream->flush(); 339 str = *InstString; 340 InstString->clear(); 341 342 PrinterMutex.release(); 343 344 return 0; 345 } 346 347 static void diag_handler(const SMDiagnostic &diag, 348 void *context) 349 { 350 if (context) { 351 EDDisassembler *disassembler = static_cast<EDDisassembler*>(context); 352 diag.Print("", disassembler->ErrorStream); 353 } 354 } 355 356 int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands, 357 SmallVectorImpl<AsmToken> &tokens, 358 const std::string &str) { 359 int ret = 0; 360 361 switch (Key.Arch) { 362 default: 363 return -1; 364 case Triple::x86: 365 case Triple::x86_64: 366 case Triple::arm: 367 case Triple::thumb: 368 break; 369 } 370 371 const char *cStr = str.c_str(); 372 MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr)); 373 374 StringRef instName; 375 SMLoc instLoc; 376 377 SourceMgr sourceMgr; 378 sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this)); 379 sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over 380 MCContext context(*AsmInfo, *MRI, NULL, NULL); 381 OwningPtr<MCStreamer> streamer(createNullStreamer(context)); 382 OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr, 383 context, *streamer, 384 *AsmInfo)); 385 386 StringRef triple = tripleFromArch(Key.Arch); 387 OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", "")); 388 OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*STI, 389 *genericParser)); 390 391 AsmToken OpcodeToken = genericParser->Lex(); 392 AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to 393 394 if (OpcodeToken.is(AsmToken::Identifier)) { 395 instName = OpcodeToken.getString(); 396 instLoc = OpcodeToken.getLoc(); 397 398 if (NextToken.isNot(AsmToken::Eof) && 399 TargetParser->ParseInstruction(instName, instLoc, operands)) 400 ret = -1; 401 } else { 402 ret = -1; 403 } 404 405 ParserMutex.acquire(); 406 407 if (!ret) { 408 GenericAsmLexer->setBuffer(buf); 409 410 while (SpecificAsmLexer->Lex(), 411 SpecificAsmLexer->isNot(AsmToken::Eof) && 412 SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) { 413 if (SpecificAsmLexer->is(AsmToken::Error)) { 414 ret = -1; 415 break; 416 } 417 tokens.push_back(SpecificAsmLexer->getTok()); 418 } 419 } 420 421 ParserMutex.release(); 422 423 return ret; 424 } 425 426 int EDDisassembler::llvmSyntaxVariant() const { 427 return LLVMSyntaxVariant; 428 } 429