Home | History | Annotate | Download | only in MCDisassembler
      1 //===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the Enhanced Disassembly library's  disassembler class.
     11 // The disassembler is responsible for vending individual instructions according
     12 // to a given architecture and disassembly syntax.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "EDDisassembler.h"
     17 #include "EDInst.h"
     18 #include "llvm/MC/EDInstInfo.h"
     19 #include "llvm/MC/MCAsmInfo.h"
     20 #include "llvm/MC/MCContext.h"
     21 #include "llvm/MC/MCDisassembler.h"
     22 #include "llvm/MC/MCExpr.h"
     23 #include "llvm/MC/MCInst.h"
     24 #include "llvm/MC/MCInstPrinter.h"
     25 #include "llvm/MC/MCInstrInfo.h"
     26 #include "llvm/MC/MCRegisterInfo.h"
     27 #include "llvm/MC/MCStreamer.h"
     28 #include "llvm/MC/MCSubtargetInfo.h"
     29 #include "llvm/MC/MCParser/AsmLexer.h"
     30 #include "llvm/MC/MCParser/MCAsmParser.h"
     31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
     32 #include "llvm/MC/MCTargetAsmLexer.h"
     33 #include "llvm/MC/MCTargetAsmParser.h"
     34 #include "llvm/Support/MemoryBuffer.h"
     35 #include "llvm/Support/MemoryObject.h"
     36 #include "llvm/Support/SourceMgr.h"
     37 #include "llvm/Support/TargetRegistry.h"
     38 using namespace llvm;
     39 
     40 EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
     41 
     42 struct TripleMap {
     43   Triple::ArchType Arch;
     44   const char *String;
     45 };
     46 
     47 static struct TripleMap triplemap[] = {
     48   { Triple::x86,          "i386-unknown-unknown"    },
     49   { Triple::x86_64,       "x86_64-unknown-unknown"  },
     50   { Triple::arm,          "arm-unknown-unknown"     },
     51   { Triple::thumb,        "thumb-unknown-unknown"   }
     52 };
     53 
     54 /// infoFromArch - Returns the TripleMap corresponding to a given architecture,
     55 ///   or NULL if there is an error
     56 ///
     57 /// @arg arch - The Triple::ArchType for the desired architecture
     58 static const char *tripleFromArch(Triple::ArchType arch) {
     59   unsigned int infoIndex;
     60 
     61   for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
     62     if (arch == triplemap[infoIndex].Arch)
     63       return triplemap[infoIndex].String;
     64   }
     65 
     66   return NULL;
     67 }
     68 
     69 /// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
     70 ///   for the desired assembly syntax, suitable for passing to
     71 ///   Target::createMCInstPrinter()
     72 ///
     73 /// @arg arch   - The target architecture
     74 /// @arg syntax - The assembly syntax in sd form
     75 static int getLLVMSyntaxVariant(Triple::ArchType arch,
     76                                 EDDisassembler::AssemblySyntax syntax) {
     77   switch (syntax) {
     78   // Mappings below from X86AsmPrinter.cpp
     79   case EDDisassembler::kEDAssemblySyntaxX86ATT:
     80     if (arch == Triple::x86 || arch == Triple::x86_64)
     81       return 0;
     82     break;
     83   case EDDisassembler::kEDAssemblySyntaxX86Intel:
     84     if (arch == Triple::x86 || arch == Triple::x86_64)
     85       return 1;
     86     break;
     87   case EDDisassembler::kEDAssemblySyntaxARMUAL:
     88     if (arch == Triple::arm || arch == Triple::thumb)
     89       return 0;
     90     break;
     91   }
     92 
     93   return -1;
     94 }
     95 
     96 EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
     97                                                 AssemblySyntax syntax) {
     98   const char *triple = tripleFromArch(arch);
     99   return getDisassembler(StringRef(triple), syntax);
    100 }
    101 
    102 EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
    103                                                 AssemblySyntax syntax) {
    104   CPUKey key;
    105   key.Triple = str.str();
    106   key.Syntax = syntax;
    107 
    108   EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
    109 
    110   if (i != sDisassemblers.end()) {
    111     return i->second;
    112   }
    113 
    114   EDDisassembler *sdd = new EDDisassembler(key);
    115   if (!sdd->valid()) {
    116     delete sdd;
    117     return NULL;
    118   }
    119 
    120   sDisassemblers[key] = sdd;
    121 
    122   return sdd;
    123 }
    124 
    125 EDDisassembler::EDDisassembler(CPUKey &key) :
    126   Valid(false),
    127   HasSemantics(false),
    128   ErrorStream(nulls()),
    129   Key(key),
    130   TgtTriple(key.Triple.c_str()) {
    131 
    132   LLVMSyntaxVariant = getLLVMSyntaxVariant(TgtTriple.getArch(), key.Syntax);
    133 
    134   if (LLVMSyntaxVariant < 0)
    135     return;
    136 
    137   std::string tripleString(key.Triple);
    138   std::string errorString;
    139 
    140   Tgt = TargetRegistry::lookupTarget(key.Triple,
    141                                      errorString);
    142 
    143   if (!Tgt)
    144     return;
    145 
    146   MRI.reset(Tgt->createMCRegInfo(tripleString));
    147 
    148   if (!MRI)
    149     return;
    150 
    151   initMaps(*MRI);
    152 
    153   AsmInfo.reset(Tgt->createMCAsmInfo(tripleString));
    154 
    155   if (!AsmInfo)
    156     return;
    157 
    158   STI.reset(Tgt->createMCSubtargetInfo(tripleString, "", ""));
    159 
    160   if (!STI)
    161     return;
    162 
    163   Disassembler.reset(Tgt->createMCDisassembler(*STI));
    164 
    165   if (!Disassembler)
    166     return;
    167 
    168   InstInfos = Disassembler->getEDInfo();
    169 
    170   MII.reset(Tgt->createMCInstrInfo());
    171 
    172   if (!MII)
    173     return;
    174 
    175   InstString.reset(new std::string);
    176   InstStream.reset(new raw_string_ostream(*InstString));
    177   InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo,
    178                                              *MII, *MRI, *STI));
    179 
    180   if (!InstPrinter)
    181     return;
    182 
    183   GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
    184   SpecificAsmLexer.reset(Tgt->createMCAsmLexer(*MRI, *AsmInfo));
    185   SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
    186 
    187   initMaps(*MRI);
    188 
    189   Valid = true;
    190 }
    191 
    192 EDDisassembler::~EDDisassembler() {
    193   if (!valid())
    194     return;
    195 }
    196 
    197 namespace {
    198   /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
    199   ///   as provided by the sd interface.  See MemoryObject.
    200   class EDMemoryObject : public llvm::MemoryObject {
    201   private:
    202     EDByteReaderCallback Callback;
    203     void *Arg;
    204   public:
    205     EDMemoryObject(EDByteReaderCallback callback,
    206                    void *arg) : Callback(callback), Arg(arg) { }
    207     ~EDMemoryObject() { }
    208     uint64_t getBase() const { return 0x0; }
    209     uint64_t getExtent() const { return (uint64_t)-1; }
    210     int readByte(uint64_t address, uint8_t *ptr) const {
    211       if (!Callback)
    212         return -1;
    213 
    214       if (Callback(ptr, address, Arg))
    215         return -1;
    216 
    217       return 0;
    218     }
    219   };
    220 }
    221 
    222 EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
    223                                    uint64_t address,
    224                                    void *arg) {
    225   EDMemoryObject memoryObject(byteReader, arg);
    226 
    227   MCInst* inst = new MCInst;
    228   uint64_t byteSize;
    229 
    230   MCDisassembler::DecodeStatus S;
    231   S = Disassembler->getInstruction(*inst, byteSize, memoryObject, address,
    232                                    ErrorStream, nulls());
    233   switch (S) {
    234   case MCDisassembler::Fail:
    235   case MCDisassembler::SoftFail:
    236     // FIXME: Do something different on soft failure mode?
    237     delete inst;
    238     return NULL;
    239 
    240   case MCDisassembler::Success: {
    241     const llvm::EDInstInfo *thisInstInfo = NULL;
    242 
    243     if (InstInfos) {
    244       thisInstInfo = &InstInfos[inst->getOpcode()];
    245     }
    246 
    247     EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
    248     return sdInst;
    249   }
    250   }
    251   return NULL;
    252 }
    253 
    254 void EDDisassembler::initMaps(const MCRegisterInfo &registerInfo) {
    255   unsigned numRegisters = registerInfo.getNumRegs();
    256   unsigned registerIndex;
    257 
    258   for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
    259     const char* registerName = registerInfo.get(registerIndex).Name;
    260 
    261     RegVec.push_back(registerName);
    262     RegRMap[registerName] = registerIndex;
    263   }
    264 
    265   switch (TgtTriple.getArch()) {
    266   default:
    267     break;
    268   case Triple::x86:
    269   case Triple::x86_64:
    270     stackPointers.insert(registerIDWithName("SP"));
    271     stackPointers.insert(registerIDWithName("ESP"));
    272     stackPointers.insert(registerIDWithName("RSP"));
    273 
    274     programCounters.insert(registerIDWithName("IP"));
    275     programCounters.insert(registerIDWithName("EIP"));
    276     programCounters.insert(registerIDWithName("RIP"));
    277     break;
    278   case Triple::arm:
    279   case Triple::thumb:
    280     stackPointers.insert(registerIDWithName("SP"));
    281 
    282     programCounters.insert(registerIDWithName("PC"));
    283     break;
    284   }
    285 }
    286 
    287 const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
    288   if (registerID >= RegVec.size())
    289     return NULL;
    290   else
    291     return RegVec[registerID].c_str();
    292 }
    293 
    294 unsigned EDDisassembler::registerIDWithName(const char *name) const {
    295   regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
    296   if (iter == RegRMap.end())
    297     return 0;
    298   else
    299     return (*iter).second;
    300 }
    301 
    302 bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
    303   return (stackPointers.find(registerID) != stackPointers.end());
    304 }
    305 
    306 bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
    307   return (programCounters.find(registerID) != programCounters.end());
    308 }
    309 
    310 int EDDisassembler::printInst(std::string &str, MCInst &inst) {
    311   PrinterMutex.acquire();
    312 
    313   InstPrinter->printInst(&inst, *InstStream, "");
    314   InstStream->flush();
    315   str = *InstString;
    316   InstString->clear();
    317 
    318   PrinterMutex.release();
    319 
    320   return 0;
    321 }
    322 
    323 static void diag_handler(const SMDiagnostic &diag, void *context) {
    324   if (context)
    325     diag.print("", static_cast<EDDisassembler*>(context)->ErrorStream);
    326 }
    327 
    328 int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
    329                               SmallVectorImpl<AsmToken> &tokens,
    330                               const std::string &str) {
    331   int ret = 0;
    332 
    333   switch (TgtTriple.getArch()) {
    334   default:
    335     return -1;
    336   case Triple::x86:
    337   case Triple::x86_64:
    338   case Triple::arm:
    339   case Triple::thumb:
    340     break;
    341   }
    342 
    343   const char *cStr = str.c_str();
    344   MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
    345 
    346   StringRef instName;
    347   SMLoc instLoc;
    348 
    349   SourceMgr sourceMgr;
    350   sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this));
    351   sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
    352   MCContext context(*AsmInfo, *MRI, NULL);
    353   OwningPtr<MCStreamer> streamer(createNullStreamer(context));
    354   OwningPtr<MCAsmParser> genericParser(createMCAsmParser(sourceMgr,
    355                                                          context, *streamer,
    356                                                          *AsmInfo));
    357 
    358   OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(Key.Triple.c_str(), "", ""));
    359   OwningPtr<MCTargetAsmParser>
    360     TargetParser(Tgt->createMCAsmParser(*STI, *genericParser));
    361 
    362   AsmToken OpcodeToken = genericParser->Lex();
    363   AsmToken NextToken = genericParser->Lex();  // consume next token, because specificParser expects us to
    364 
    365   if (OpcodeToken.is(AsmToken::Identifier)) {
    366     instName = OpcodeToken.getString();
    367     instLoc = OpcodeToken.getLoc();
    368 
    369     if (NextToken.isNot(AsmToken::Eof) &&
    370         TargetParser->ParseInstruction(instName, instLoc, operands))
    371       ret = -1;
    372   } else {
    373     ret = -1;
    374   }
    375 
    376   ParserMutex.acquire();
    377 
    378   if (!ret) {
    379     GenericAsmLexer->setBuffer(buf);
    380 
    381     while (SpecificAsmLexer->Lex(),
    382            SpecificAsmLexer->isNot(AsmToken::Eof) &&
    383            SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
    384       if (SpecificAsmLexer->is(AsmToken::Error)) {
    385         ret = -1;
    386         break;
    387       }
    388       tokens.push_back(SpecificAsmLexer->getTok());
    389     }
    390   }
    391 
    392   ParserMutex.release();
    393 
    394   return ret;
    395 }
    396 
    397 int EDDisassembler::llvmSyntaxVariant() const {
    398   return LLVMSyntaxVariant;
    399 }
    400