Home | History | Annotate | Download | only in llvm-mc
      1 //===- Disassembler.cpp - Disassembler for hex strings --------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This class implements the disassembler of strings of bytes written in
     11 // hexadecimal, from standard input or from a file.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "Disassembler.h"
     16 #include "llvm/ADT/Triple.h"
     17 #include "llvm/MC/MCAsmInfo.h"
     18 #include "llvm/MC/MCContext.h"
     19 #include "llvm/MC/MCDisassembler.h"
     20 #include "llvm/MC/MCInst.h"
     21 #include "llvm/MC/MCRegisterInfo.h"
     22 #include "llvm/MC/MCStreamer.h"
     23 #include "llvm/MC/MCSubtargetInfo.h"
     24 #include "llvm/Support/MemoryBuffer.h"
     25 #include "llvm/Support/MemoryObject.h"
     26 #include "llvm/Support/SourceMgr.h"
     27 #include "llvm/Support/TargetRegistry.h"
     28 #include "llvm/Support/raw_ostream.h"
     29 
     30 using namespace llvm;
     31 
     32 typedef std::vector<std::pair<unsigned char, const char*> > ByteArrayTy;
     33 
     34 namespace {
     35 class VectorMemoryObject : public MemoryObject {
     36 private:
     37   const ByteArrayTy &Bytes;
     38 public:
     39   VectorMemoryObject(const ByteArrayTy &bytes) : Bytes(bytes) {}
     40 
     41   uint64_t getBase() const override { return 0; }
     42   uint64_t getExtent() const override { return Bytes.size(); }
     43 
     44   int readByte(uint64_t Addr, uint8_t *Byte) const override {
     45     if (Addr >= getExtent())
     46       return -1;
     47     *Byte = Bytes[Addr].first;
     48     return 0;
     49   }
     50 };
     51 }
     52 
     53 static bool PrintInsts(const MCDisassembler &DisAsm,
     54                        const ByteArrayTy &Bytes,
     55                        SourceMgr &SM, raw_ostream &Out,
     56                        MCStreamer &Streamer, bool InAtomicBlock,
     57                        const MCSubtargetInfo &STI) {
     58   // Wrap the vector in a MemoryObject.
     59   VectorMemoryObject memoryObject(Bytes);
     60 
     61   // Disassemble it to strings.
     62   uint64_t Size;
     63   uint64_t Index;
     64 
     65   for (Index = 0; Index < Bytes.size(); Index += Size) {
     66     MCInst Inst;
     67 
     68     MCDisassembler::DecodeStatus S;
     69     S = DisAsm.getInstruction(Inst, Size, memoryObject, Index,
     70                               /*REMOVE*/ nulls(), nulls());
     71     switch (S) {
     72     case MCDisassembler::Fail:
     73       SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
     74                       SourceMgr::DK_Warning,
     75                       "invalid instruction encoding");
     76       // Don't try to resynchronise the stream in a block
     77       if (InAtomicBlock)
     78         return true;
     79 
     80       if (Size == 0)
     81         Size = 1; // skip illegible bytes
     82 
     83       break;
     84 
     85     case MCDisassembler::SoftFail:
     86       SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
     87                       SourceMgr::DK_Warning,
     88                       "potentially undefined instruction encoding");
     89       // Fall through
     90 
     91     case MCDisassembler::Success:
     92       Streamer.EmitInstruction(Inst, STI);
     93       break;
     94     }
     95   }
     96 
     97   return false;
     98 }
     99 
    100 static bool SkipToToken(StringRef &Str) {
    101   while (!Str.empty() && Str.find_first_not_of(" \t\r\n#,") != 0) {
    102     // Strip horizontal whitespace and commas.
    103     if (size_t Pos = Str.find_first_not_of(" \t\r,")) {
    104       Str = Str.substr(Pos);
    105     }
    106 
    107     // If this is the end of a line or start of a comment, remove the rest of
    108     // the line.
    109     if (Str[0] == '\n' || Str[0] == '#') {
    110       // Strip to the end of line if we already processed any bytes on this
    111       // line.  This strips the comment and/or the \n.
    112       if (Str[0] == '\n') {
    113         Str = Str.substr(1);
    114       } else {
    115         Str = Str.substr(Str.find_first_of('\n'));
    116         if (!Str.empty())
    117           Str = Str.substr(1);
    118       }
    119       continue;
    120     }
    121   }
    122 
    123   return !Str.empty();
    124 }
    125 
    126 
    127 static bool ByteArrayFromString(ByteArrayTy &ByteArray,
    128                                 StringRef &Str,
    129                                 SourceMgr &SM) {
    130   while (SkipToToken(Str)) {
    131     // Handled by higher level
    132     if (Str[0] == '[' || Str[0] == ']')
    133       return false;
    134 
    135     // Get the current token.
    136     size_t Next = Str.find_first_of(" \t\n\r,#[]");
    137     StringRef Value = Str.substr(0, Next);
    138 
    139     // Convert to a byte and add to the byte vector.
    140     unsigned ByteVal;
    141     if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
    142       // If we have an error, print it and skip to the end of line.
    143       SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
    144                       "invalid input token");
    145       Str = Str.substr(Str.find('\n'));
    146       ByteArray.clear();
    147       continue;
    148     }
    149 
    150     ByteArray.push_back(std::make_pair((unsigned char)ByteVal, Value.data()));
    151     Str = Str.substr(Next);
    152   }
    153 
    154   return false;
    155 }
    156 
    157 int Disassembler::disassemble(const Target &T,
    158                               const std::string &Triple,
    159                               MCSubtargetInfo &STI,
    160                               MCStreamer &Streamer,
    161                               MemoryBuffer &Buffer,
    162                               SourceMgr &SM,
    163                               raw_ostream &Out) {
    164 
    165   std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
    166   if (!MRI) {
    167     errs() << "error: no register info for target " << Triple << "\n";
    168     return -1;
    169   }
    170 
    171   std::unique_ptr<const MCAsmInfo> MAI(T.createMCAsmInfo(*MRI, Triple));
    172   if (!MAI) {
    173     errs() << "error: no assembly info for target " << Triple << "\n";
    174     return -1;
    175   }
    176 
    177   // Set up the MCContext for creating symbols and MCExpr's.
    178   MCContext Ctx(MAI.get(), MRI.get(), nullptr);
    179 
    180   std::unique_ptr<const MCDisassembler> DisAsm(
    181     T.createMCDisassembler(STI, Ctx));
    182   if (!DisAsm) {
    183     errs() << "error: no disassembler for target " << Triple << "\n";
    184     return -1;
    185   }
    186 
    187   // Set up initial section manually here
    188   Streamer.InitSections();
    189 
    190   bool ErrorOccurred = false;
    191 
    192   // Convert the input to a vector for disassembly.
    193   ByteArrayTy ByteArray;
    194   StringRef Str = Buffer.getBuffer();
    195   bool InAtomicBlock = false;
    196 
    197   while (SkipToToken(Str)) {
    198     ByteArray.clear();
    199 
    200     if (Str[0] == '[') {
    201       if (InAtomicBlock) {
    202         SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
    203                         "nested atomic blocks make no sense");
    204         ErrorOccurred = true;
    205       }
    206       InAtomicBlock = true;
    207       Str = Str.drop_front();
    208       continue;
    209     } else if (Str[0] == ']') {
    210       if (!InAtomicBlock) {
    211         SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
    212                         "attempt to close atomic block without opening");
    213         ErrorOccurred = true;
    214       }
    215       InAtomicBlock = false;
    216       Str = Str.drop_front();
    217       continue;
    218     }
    219 
    220     // It's a real token, get the bytes and emit them
    221     ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
    222 
    223     if (!ByteArray.empty())
    224       ErrorOccurred |= PrintInsts(*DisAsm, ByteArray, SM, Out, Streamer,
    225                                   InAtomicBlock, STI);
    226   }
    227 
    228   if (InAtomicBlock) {
    229     SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
    230                     "unclosed atomic block");
    231     ErrorOccurred = true;
    232   }
    233 
    234   return ErrorOccurred;
    235 }
    236