Home | History | Annotate | Download | only in llvm-mc
      1 //===- Disassembler.cpp - Disassembler for hex strings --------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This class implements the disassembler of strings of bytes written in
     11 // hexadecimal, from standard input or from a file.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "Disassembler.h"
     16 #include "../../lib/MC/MCDisassembler/EDDisassembler.h"
     17 #include "../../lib/MC/MCDisassembler/EDInst.h"
     18 #include "../../lib/MC/MCDisassembler/EDOperand.h"
     19 #include "../../lib/MC/MCDisassembler/EDToken.h"
     20 #include "llvm/MC/MCDisassembler.h"
     21 #include "llvm/MC/MCInst.h"
     22 #include "llvm/MC/MCStreamer.h"
     23 #include "llvm/MC/MCSubtargetInfo.h"
     24 #include "llvm/ADT/OwningPtr.h"
     25 #include "llvm/ADT/Triple.h"
     26 #include "llvm/Support/MemoryBuffer.h"
     27 #include "llvm/Support/MemoryObject.h"
     28 #include "llvm/Support/SourceMgr.h"
     29 #include "llvm/Support/TargetRegistry.h"
     30 #include "llvm/Support/raw_ostream.h"
     31 
     32 using namespace llvm;
     33 
     34 typedef std::vector<std::pair<unsigned char, const char*> > ByteArrayTy;
     35 
     36 namespace {
     37 class VectorMemoryObject : public MemoryObject {
     38 private:
     39   const ByteArrayTy &Bytes;
     40 public:
     41   VectorMemoryObject(const ByteArrayTy &bytes) : Bytes(bytes) {}
     42 
     43   uint64_t getBase() const { return 0; }
     44   uint64_t getExtent() const { return Bytes.size(); }
     45 
     46   int readByte(uint64_t Addr, uint8_t *Byte) const {
     47     if (Addr >= getExtent())
     48       return -1;
     49     *Byte = Bytes[Addr].first;
     50     return 0;
     51   }
     52 };
     53 }
     54 
     55 static bool PrintInsts(const MCDisassembler &DisAsm,
     56                        const ByteArrayTy &Bytes,
     57                        SourceMgr &SM, raw_ostream &Out,
     58                        MCStreamer &Streamer) {
     59   // Wrap the vector in a MemoryObject.
     60   VectorMemoryObject memoryObject(Bytes);
     61 
     62   // Disassemble it to strings.
     63   uint64_t Size;
     64   uint64_t Index;
     65 
     66   for (Index = 0; Index < Bytes.size(); Index += Size) {
     67     MCInst Inst;
     68 
     69     MCDisassembler::DecodeStatus S;
     70     S = DisAsm.getInstruction(Inst, Size, memoryObject, Index,
     71                               /*REMOVE*/ nulls(), nulls());
     72     switch (S) {
     73     case MCDisassembler::Fail:
     74       SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
     75                       SourceMgr::DK_Warning,
     76                       "invalid instruction encoding");
     77       if (Size == 0)
     78         Size = 1; // skip illegible bytes
     79       break;
     80 
     81     case MCDisassembler::SoftFail:
     82       SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
     83                       SourceMgr::DK_Warning,
     84                       "potentially undefined instruction encoding");
     85       // Fall through
     86 
     87     case MCDisassembler::Success:
     88       Streamer.EmitInstruction(Inst);
     89       break;
     90     }
     91   }
     92 
     93   return false;
     94 }
     95 
     96 static bool ByteArrayFromString(ByteArrayTy &ByteArray,
     97                                 StringRef &Str,
     98                                 SourceMgr &SM) {
     99   while (!Str.empty()) {
    100     // Strip horizontal whitespace.
    101     if (size_t Pos = Str.find_first_not_of(" \t\r")) {
    102       Str = Str.substr(Pos);
    103       continue;
    104     }
    105 
    106     // If this is the end of a line or start of a comment, remove the rest of
    107     // the line.
    108     if (Str[0] == '\n' || Str[0] == '#') {
    109       // Strip to the end of line if we already processed any bytes on this
    110       // line.  This strips the comment and/or the \n.
    111       if (Str[0] == '\n') {
    112         Str = Str.substr(1);
    113       } else {
    114         Str = Str.substr(Str.find_first_of('\n'));
    115         if (!Str.empty())
    116           Str = Str.substr(1);
    117       }
    118       continue;
    119     }
    120 
    121     // Get the current token.
    122     size_t Next = Str.find_first_of(" \t\n\r#");
    123     StringRef Value = Str.substr(0, Next);
    124 
    125     // Convert to a byte and add to the byte vector.
    126     unsigned ByteVal;
    127     if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
    128       // If we have an error, print it and skip to the end of line.
    129       SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
    130                       "invalid input token");
    131       Str = Str.substr(Str.find('\n'));
    132       ByteArray.clear();
    133       continue;
    134     }
    135 
    136     ByteArray.push_back(std::make_pair((unsigned char)ByteVal, Value.data()));
    137     Str = Str.substr(Next);
    138   }
    139 
    140   return false;
    141 }
    142 
    143 int Disassembler::disassemble(const Target &T,
    144                               const std::string &Triple,
    145                               MCSubtargetInfo &STI,
    146                               MCStreamer &Streamer,
    147                               MemoryBuffer &Buffer,
    148                               SourceMgr &SM,
    149                               raw_ostream &Out) {
    150   OwningPtr<const MCDisassembler> DisAsm(T.createMCDisassembler(STI));
    151   if (!DisAsm) {
    152     errs() << "error: no disassembler for target " << Triple << "\n";
    153     return -1;
    154   }
    155 
    156   // Set up initial section manually here
    157   Streamer.InitSections();
    158 
    159   bool ErrorOccurred = false;
    160 
    161   // Convert the input to a vector for disassembly.
    162   ByteArrayTy ByteArray;
    163   StringRef Str = Buffer.getBuffer();
    164 
    165   ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
    166 
    167   if (!ByteArray.empty())
    168     ErrorOccurred |= PrintInsts(*DisAsm, ByteArray, SM, Out, Streamer);
    169 
    170   return ErrorOccurred;
    171 }
    172 
    173 static int byteArrayReader(uint8_t *B, uint64_t A, void *Arg) {
    174   ByteArrayTy &ByteArray = *((ByteArrayTy*)Arg);
    175 
    176   if (A >= ByteArray.size())
    177     return -1;
    178 
    179   *B = ByteArray[A].first;
    180 
    181   return 0;
    182 }
    183 
    184 static int verboseEvaluator(uint64_t *V, unsigned R, void *Arg) {
    185   EDDisassembler &disassembler = *(EDDisassembler *)((void **)Arg)[0];
    186   raw_ostream &Out = *(raw_ostream *)((void **)Arg)[1];
    187 
    188   if (const char *regName = disassembler.nameWithRegisterID(R))
    189     Out << "[" << regName << "/" << R << "]";
    190 
    191   if (disassembler.registerIsStackPointer(R))
    192     Out << "(sp)";
    193   if (disassembler.registerIsProgramCounter(R))
    194     Out << "(pc)";
    195 
    196   *V = 0;
    197   return 0;
    198 }
    199 
    200 int Disassembler::disassembleEnhanced(const std::string &TS,
    201                                       MemoryBuffer &Buffer,
    202                                       SourceMgr &SM,
    203                                       raw_ostream &Out) {
    204   ByteArrayTy ByteArray;
    205   StringRef Str = Buffer.getBuffer();
    206 
    207   if (ByteArrayFromString(ByteArray, Str, SM)) {
    208     return -1;
    209   }
    210 
    211   Triple T(TS);
    212   EDDisassembler::AssemblySyntax AS;
    213 
    214   switch (T.getArch()) {
    215   default:
    216     errs() << "error: no default assembly syntax for " << TS.c_str() << "\n";
    217     return -1;
    218   case Triple::arm:
    219   case Triple::thumb:
    220     AS = EDDisassembler::kEDAssemblySyntaxARMUAL;
    221     break;
    222   case Triple::x86:
    223   case Triple::x86_64:
    224     AS = EDDisassembler::kEDAssemblySyntaxX86ATT;
    225     break;
    226   }
    227 
    228   OwningPtr<EDDisassembler>
    229     disassembler(EDDisassembler::getDisassembler(TS.c_str(), AS));
    230 
    231   if (disassembler == 0) {
    232     errs() << "error: couldn't get disassembler for " << TS << '\n';
    233     return -1;
    234   }
    235 
    236   while (ByteArray.size()) {
    237     OwningPtr<EDInst>
    238       inst(disassembler->createInst(byteArrayReader, 0, &ByteArray));
    239 
    240     if (inst == 0) {
    241       errs() << "error: Didn't get an instruction\n";
    242       return -1;
    243     }
    244 
    245     ByteArray.erase (ByteArray.begin(), ByteArray.begin() + inst->byteSize());
    246 
    247     unsigned numTokens = inst->numTokens();
    248     if ((int)numTokens < 0) {
    249       errs() << "error: couldn't count the instruction's tokens\n";
    250       return -1;
    251     }
    252 
    253     for (unsigned tokenIndex = 0; tokenIndex != numTokens; ++tokenIndex) {
    254       EDToken *token;
    255 
    256       if (inst->getToken(token, tokenIndex)) {
    257         errs() << "error: Couldn't get token\n";
    258         return -1;
    259       }
    260 
    261       const char *buf;
    262       if (token->getString(buf)) {
    263         errs() << "error: Couldn't get string for token\n";
    264         return -1;
    265       }
    266 
    267       Out << '[';
    268       int operandIndex = token->operandID();
    269 
    270       if (operandIndex >= 0)
    271         Out << operandIndex << "-";
    272 
    273       switch (token->type()) {
    274       case EDToken::kTokenWhitespace: Out << "w"; break;
    275       case EDToken::kTokenPunctuation: Out << "p"; break;
    276       case EDToken::kTokenOpcode: Out << "o"; break;
    277       case EDToken::kTokenLiteral: Out << "l"; break;
    278       case EDToken::kTokenRegister: Out << "r"; break;
    279       }
    280 
    281       Out << ":" << buf;
    282 
    283       if (token->type() == EDToken::kTokenLiteral) {
    284         Out << "=";
    285         if (token->literalSign())
    286           Out << "-";
    287         uint64_t absoluteValue;
    288         if (token->literalAbsoluteValue(absoluteValue)) {
    289           errs() << "error: Couldn't get the value of a literal token\n";
    290           return -1;
    291         }
    292         Out << absoluteValue;
    293       } else if (token->type() == EDToken::kTokenRegister) {
    294         Out << "=";
    295         unsigned regID;
    296         if (token->registerID(regID)) {
    297           errs() << "error: Couldn't get the ID of a register token\n";
    298           return -1;
    299         }
    300         Out << "r" << regID;
    301       }
    302 
    303       Out << "]";
    304     }
    305 
    306     Out << " ";
    307 
    308     if (inst->isBranch())
    309       Out << "<br> ";
    310     if (inst->isMove())
    311       Out << "<mov> ";
    312 
    313     unsigned numOperands = inst->numOperands();
    314 
    315     if ((int)numOperands < 0) {
    316       errs() << "error: Couldn't count operands\n";
    317       return -1;
    318     }
    319 
    320     for (unsigned operandIndex = 0; operandIndex != numOperands;
    321          ++operandIndex) {
    322       Out << operandIndex << ":";
    323 
    324       EDOperand *operand;
    325       if (inst->getOperand(operand, operandIndex)) {
    326         errs() << "error: couldn't get operand\n";
    327         return -1;
    328       }
    329 
    330       uint64_t evaluatedResult;
    331       void *Arg[] = { disassembler.get(), &Out };
    332       if (operand->evaluate(evaluatedResult, verboseEvaluator, Arg)) {
    333         errs() << "error: Couldn't evaluate an operand\n";
    334         return -1;
    335       }
    336       Out << "=" << evaluatedResult << " ";
    337     }
    338 
    339     Out << '\n';
    340   }
    341 
    342   return 0;
    343 }
    344