Home | History | Annotate | Download | only in MCDisassembler
      1 //===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the Enhanced Disassembly library's  disassembler class.
     11 // The disassembler is responsible for vending individual instructions according
     12 // to a given architecture and disassembly syntax.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "EDDisassembler.h"
     17 #include "EDInst.h"
     18 #include "llvm/MC/EDInstInfo.h"
     19 #include "llvm/MC/MCAsmInfo.h"
     20 #include "llvm/MC/MCContext.h"
     21 #include "llvm/MC/MCDisassembler.h"
     22 #include "llvm/MC/MCExpr.h"
     23 #include "llvm/MC/MCInst.h"
     24 #include "llvm/MC/MCInstPrinter.h"
     25 #include "llvm/MC/MCRegisterInfo.h"
     26 #include "llvm/MC/MCStreamer.h"
     27 #include "llvm/MC/MCSubtargetInfo.h"
     28 #include "llvm/MC/MCParser/AsmLexer.h"
     29 #include "llvm/MC/MCParser/MCAsmParser.h"
     30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
     31 #include "llvm/Support/MemoryBuffer.h"
     32 #include "llvm/Support/MemoryObject.h"
     33 #include "llvm/Support/SourceMgr.h"
     34 #include "llvm/Target/TargetAsmLexer.h"
     35 #include "llvm/Target/TargetAsmParser.h"
     36 #include "llvm/Target/TargetRegistry.h"
     37 #include "llvm/Target/TargetMachine.h"
     38 #include "llvm/Target/TargetRegisterInfo.h"
     39 #include "llvm/Target/TargetSelect.h"
     40 using namespace llvm;
     41 
     42 bool EDDisassembler::sInitialized = false;
     43 EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
     44 
     45 struct TripleMap {
     46   Triple::ArchType Arch;
     47   const char *String;
     48 };
     49 
     50 static struct TripleMap triplemap[] = {
     51   { Triple::x86,          "i386-unknown-unknown"    },
     52   { Triple::x86_64,       "x86_64-unknown-unknown"  },
     53   { Triple::arm,          "arm-unknown-unknown"     },
     54   { Triple::thumb,        "thumb-unknown-unknown"   },
     55   { Triple::InvalidArch,  NULL,                     }
     56 };
     57 
     58 /// infoFromArch - Returns the TripleMap corresponding to a given architecture,
     59 ///   or NULL if there is an error
     60 ///
     61 /// @arg arch - The Triple::ArchType for the desired architecture
     62 static const char *tripleFromArch(Triple::ArchType arch) {
     63   unsigned int infoIndex;
     64 
     65   for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
     66     if (arch == triplemap[infoIndex].Arch)
     67       return triplemap[infoIndex].String;
     68   }
     69 
     70   return NULL;
     71 }
     72 
     73 /// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
     74 ///   for the desired assembly syntax, suitable for passing to
     75 ///   Target::createMCInstPrinter()
     76 ///
     77 /// @arg arch   - The target architecture
     78 /// @arg syntax - The assembly syntax in sd form
     79 static int getLLVMSyntaxVariant(Triple::ArchType arch,
     80                                 EDDisassembler::AssemblySyntax syntax) {
     81   switch (syntax) {
     82   default:
     83     return -1;
     84   // Mappings below from X86AsmPrinter.cpp
     85   case EDDisassembler::kEDAssemblySyntaxX86ATT:
     86     if (arch == Triple::x86 || arch == Triple::x86_64)
     87       return 0;
     88     else
     89       return -1;
     90   case EDDisassembler::kEDAssemblySyntaxX86Intel:
     91     if (arch == Triple::x86 || arch == Triple::x86_64)
     92       return 1;
     93     else
     94       return -1;
     95   case EDDisassembler::kEDAssemblySyntaxARMUAL:
     96     if (arch == Triple::arm || arch == Triple::thumb)
     97       return 0;
     98     else
     99       return -1;
    100   }
    101 }
    102 
    103 void EDDisassembler::initialize() {
    104   if (sInitialized)
    105     return;
    106 
    107   sInitialized = true;
    108 
    109   InitializeAllTargetInfos();
    110   InitializeAllTargets();
    111   InitializeAllMCCodeGenInfos();
    112   InitializeAllMCAsmInfos();
    113   InitializeAllMCRegisterInfos();
    114   InitializeAllMCSubtargetInfos();
    115   InitializeAllAsmPrinters();
    116   InitializeAllAsmParsers();
    117   InitializeAllDisassemblers();
    118 }
    119 
    120 #undef BRINGUP_TARGET
    121 
    122 EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
    123                                                 AssemblySyntax syntax) {
    124   CPUKey key;
    125   key.Arch = arch;
    126   key.Syntax = syntax;
    127 
    128   EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
    129 
    130   if (i != sDisassemblers.end()) {
    131     return i->second;
    132   } else {
    133     EDDisassembler* sdd = new EDDisassembler(key);
    134     if (!sdd->valid()) {
    135       delete sdd;
    136       return NULL;
    137     }
    138 
    139     sDisassemblers[key] = sdd;
    140 
    141     return sdd;
    142   }
    143 
    144   return NULL;
    145 }
    146 
    147 EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
    148                                                 AssemblySyntax syntax) {
    149   return getDisassembler(Triple(str).getArch(), syntax);
    150 }
    151 
    152 EDDisassembler::EDDisassembler(CPUKey &key) :
    153   Valid(false),
    154   HasSemantics(false),
    155   ErrorStream(nulls()),
    156   Key(key) {
    157   const char *triple = tripleFromArch(key.Arch);
    158 
    159   if (!triple)
    160     return;
    161 
    162   LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
    163 
    164   if (LLVMSyntaxVariant < 0)
    165     return;
    166 
    167   std::string tripleString(triple);
    168   std::string errorString;
    169 
    170   Tgt = TargetRegistry::lookupTarget(tripleString,
    171                                      errorString);
    172 
    173   if (!Tgt)
    174     return;
    175 
    176   std::string CPU;
    177   std::string featureString;
    178   TargetMachine.reset(Tgt->createTargetMachine(tripleString, CPU,
    179                                                featureString));
    180 
    181   const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
    182 
    183   if (!registerInfo)
    184     return;
    185 
    186   initMaps(*registerInfo);
    187 
    188   AsmInfo.reset(Tgt->createMCAsmInfo(tripleString));
    189 
    190   if (!AsmInfo)
    191     return;
    192 
    193   MRI.reset(Tgt->createMCRegInfo(tripleString));
    194 
    195   if (!MRI)
    196     return;
    197 
    198   Disassembler.reset(Tgt->createMCDisassembler());
    199 
    200   if (!Disassembler)
    201     return;
    202 
    203   InstInfos = Disassembler->getEDInfo();
    204 
    205   InstString.reset(new std::string);
    206   InstStream.reset(new raw_string_ostream(*InstString));
    207   InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
    208 
    209   if (!InstPrinter)
    210     return;
    211 
    212   GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
    213   SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
    214   SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
    215 
    216   initMaps(*TargetMachine->getRegisterInfo());
    217 
    218   Valid = true;
    219 }
    220 
    221 EDDisassembler::~EDDisassembler() {
    222   if (!valid())
    223     return;
    224 }
    225 
    226 namespace {
    227   /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
    228   ///   as provided by the sd interface.  See MemoryObject.
    229   class EDMemoryObject : public llvm::MemoryObject {
    230   private:
    231     EDByteReaderCallback Callback;
    232     void *Arg;
    233   public:
    234     EDMemoryObject(EDByteReaderCallback callback,
    235                    void *arg) : Callback(callback), Arg(arg) { }
    236     ~EDMemoryObject() { }
    237     uint64_t getBase() const { return 0x0; }
    238     uint64_t getExtent() const { return (uint64_t)-1; }
    239     int readByte(uint64_t address, uint8_t *ptr) const {
    240       if (!Callback)
    241         return -1;
    242 
    243       if (Callback(ptr, address, Arg))
    244         return -1;
    245 
    246       return 0;
    247     }
    248   };
    249 }
    250 
    251 EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
    252                                    uint64_t address,
    253                                    void *arg) {
    254   EDMemoryObject memoryObject(byteReader, arg);
    255 
    256   MCInst* inst = new MCInst;
    257   uint64_t byteSize;
    258 
    259   if (!Disassembler->getInstruction(*inst,
    260                                     byteSize,
    261                                     memoryObject,
    262                                     address,
    263                                     ErrorStream)) {
    264     delete inst;
    265     return NULL;
    266   } else {
    267     const llvm::EDInstInfo *thisInstInfo = NULL;
    268 
    269     if (InstInfos) {
    270       thisInstInfo = &InstInfos[inst->getOpcode()];
    271     }
    272 
    273     EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
    274     return sdInst;
    275   }
    276 }
    277 
    278 void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
    279   unsigned numRegisters = registerInfo.getNumRegs();
    280   unsigned registerIndex;
    281 
    282   for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
    283     const char* registerName = registerInfo.get(registerIndex).Name;
    284 
    285     RegVec.push_back(registerName);
    286     RegRMap[registerName] = registerIndex;
    287   }
    288 
    289   switch (Key.Arch) {
    290   default:
    291     break;
    292   case Triple::x86:
    293   case Triple::x86_64:
    294     stackPointers.insert(registerIDWithName("SP"));
    295     stackPointers.insert(registerIDWithName("ESP"));
    296     stackPointers.insert(registerIDWithName("RSP"));
    297 
    298     programCounters.insert(registerIDWithName("IP"));
    299     programCounters.insert(registerIDWithName("EIP"));
    300     programCounters.insert(registerIDWithName("RIP"));
    301     break;
    302   case Triple::arm:
    303   case Triple::thumb:
    304     stackPointers.insert(registerIDWithName("SP"));
    305 
    306     programCounters.insert(registerIDWithName("PC"));
    307     break;
    308   }
    309 }
    310 
    311 const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
    312   if (registerID >= RegVec.size())
    313     return NULL;
    314   else
    315     return RegVec[registerID].c_str();
    316 }
    317 
    318 unsigned EDDisassembler::registerIDWithName(const char *name) const {
    319   regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
    320   if (iter == RegRMap.end())
    321     return 0;
    322   else
    323     return (*iter).second;
    324 }
    325 
    326 bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
    327   return (stackPointers.find(registerID) != stackPointers.end());
    328 }
    329 
    330 bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
    331   return (programCounters.find(registerID) != programCounters.end());
    332 }
    333 
    334 int EDDisassembler::printInst(std::string &str, MCInst &inst) {
    335   PrinterMutex.acquire();
    336 
    337   InstPrinter->printInst(&inst, *InstStream);
    338   InstStream->flush();
    339   str = *InstString;
    340   InstString->clear();
    341 
    342   PrinterMutex.release();
    343 
    344   return 0;
    345 }
    346 
    347 static void diag_handler(const SMDiagnostic &diag,
    348                          void *context)
    349 {
    350   if (context) {
    351     EDDisassembler *disassembler = static_cast<EDDisassembler*>(context);
    352     diag.Print("", disassembler->ErrorStream);
    353   }
    354 }
    355 
    356 int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
    357                               SmallVectorImpl<AsmToken> &tokens,
    358                               const std::string &str) {
    359   int ret = 0;
    360 
    361   switch (Key.Arch) {
    362   default:
    363     return -1;
    364   case Triple::x86:
    365   case Triple::x86_64:
    366   case Triple::arm:
    367   case Triple::thumb:
    368     break;
    369   }
    370 
    371   const char *cStr = str.c_str();
    372   MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
    373 
    374   StringRef instName;
    375   SMLoc instLoc;
    376 
    377   SourceMgr sourceMgr;
    378   sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this));
    379   sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
    380   MCContext context(*AsmInfo, *MRI, NULL, NULL);
    381   OwningPtr<MCStreamer> streamer(createNullStreamer(context));
    382   OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
    383                                                          context, *streamer,
    384                                                          *AsmInfo));
    385 
    386   StringRef triple = tripleFromArch(Key.Arch);
    387   OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", ""));
    388   OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*STI,
    389                                                                *genericParser));
    390 
    391   AsmToken OpcodeToken = genericParser->Lex();
    392   AsmToken NextToken = genericParser->Lex();  // consume next token, because specificParser expects us to
    393 
    394   if (OpcodeToken.is(AsmToken::Identifier)) {
    395     instName = OpcodeToken.getString();
    396     instLoc = OpcodeToken.getLoc();
    397 
    398     if (NextToken.isNot(AsmToken::Eof) &&
    399         TargetParser->ParseInstruction(instName, instLoc, operands))
    400       ret = -1;
    401   } else {
    402     ret = -1;
    403   }
    404 
    405   ParserMutex.acquire();
    406 
    407   if (!ret) {
    408     GenericAsmLexer->setBuffer(buf);
    409 
    410     while (SpecificAsmLexer->Lex(),
    411            SpecificAsmLexer->isNot(AsmToken::Eof) &&
    412            SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
    413       if (SpecificAsmLexer->is(AsmToken::Error)) {
    414         ret = -1;
    415         break;
    416       }
    417       tokens.push_back(SpecificAsmLexer->getTok());
    418     }
    419   }
    420 
    421   ParserMutex.release();
    422 
    423   return ret;
    424 }
    425 
    426 int EDDisassembler::llvmSyntaxVariant() const {
    427   return LLVMSyntaxVariant;
    428 }
    429