Home | History | Annotate | Download | only in MCDisassembler
      1 //===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the interface for the Enhanced Disassembly library's
     11 // disassembler class.  The disassembler is responsible for vending individual
     12 // instructions according to a given architecture and disassembly syntax.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #ifndef LLVM_EDDISASSEMBLER_H
     17 #define LLVM_EDDISASSEMBLER_H
     18 
     19 #include "EDInfo.h"
     20 
     21 #include "llvm/ADT/OwningPtr.h"
     22 #include "llvm/ADT/Triple.h"
     23 #include "llvm/Support/raw_ostream.h"
     24 #include "llvm/Support/Mutex.h"
     25 
     26 #include <map>
     27 #include <set>
     28 #include <vector>
     29 
     30 namespace llvm {
     31 class AsmLexer;
     32 class AsmParser;
     33 class AsmToken;
     34 class MCContext;
     35 class MCAsmInfo;
     36 class MCAsmLexer;
     37 class MCDisassembler;
     38 class MCInstPrinter;
     39 class MCInst;
     40 class MCParsedAsmOperand;
     41 class MCRegisterInfo;
     42 class MCStreamer;
     43 class MCSubtargetInfo;
     44 template <typename T> class SmallVectorImpl;
     45 class SourceMgr;
     46 class Target;
     47 class TargetAsmLexer;
     48 class TargetAsmParser;
     49 class TargetMachine;
     50 class TargetRegisterInfo;
     51 
     52 struct EDInstInfo;
     53 struct EDInst;
     54 struct EDOperand;
     55 struct EDToken;
     56 
     57 typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
     58 
     59 /// EDDisassembler - Encapsulates a disassembler for a single architecture and
     60 ///   disassembly syntax.  Also manages the static disassembler registry.
     61 struct EDDisassembler {
     62   typedef enum {
     63     /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
     64     kEDAssemblySyntaxX86Intel  = 0,
     65     /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
     66     kEDAssemblySyntaxX86ATT    = 1,
     67     kEDAssemblySyntaxARMUAL    = 2
     68   } AssemblySyntax;
     69 
     70 
     71   ////////////////////
     72   // Static members //
     73   ////////////////////
     74 
     75   /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
     76   ///   pair
     77   struct CPUKey {
     78     /// The architecture type
     79     llvm::Triple::ArchType Arch;
     80 
     81     /// The assembly syntax
     82     AssemblySyntax Syntax;
     83 
     84     /// operator== - Equality operator
     85     bool operator==(const CPUKey &key) const {
     86       return (Arch == key.Arch &&
     87               Syntax == key.Syntax);
     88     }
     89 
     90     /// operator< - Less-than operator
     91     bool operator<(const CPUKey &key) const {
     92       return ((Arch < key.Arch) ||
     93               ((Arch == key.Arch) && Syntax < (key.Syntax)));
     94     }
     95   };
     96 
     97   typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
     98 
     99   /// True if the disassembler registry has been initialized; false if not
    100   static bool sInitialized;
    101   /// A map from disassembler specifications to disassemblers.  Populated
    102   ///   lazily.
    103   static DisassemblerMap_t sDisassemblers;
    104 
    105   /// getDisassembler - Returns the specified disassemble, or NULL on failure
    106   ///
    107   /// @arg arch   - The desired architecture
    108   /// @arg syntax - The desired disassembly syntax
    109   static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
    110                                          AssemblySyntax syntax);
    111 
    112   /// getDisassembler - Returns the disassembler for a given combination of
    113   ///   CPU type, CPU subtype, and assembly syntax, or NULL on failure
    114   ///
    115   /// @arg str    - The string representation of the architecture triple, e.g.,
    116   ///               "x86_64-apple-darwin"
    117   /// @arg syntax - The disassembly syntax for the required disassembler
    118   static EDDisassembler *getDisassembler(llvm::StringRef str,
    119                                          AssemblySyntax syntax);
    120 
    121   /// initialize - Initializes the disassembler registry and the LLVM backend
    122   static void initialize();
    123 
    124   ////////////////////////
    125   // Per-object members //
    126   ////////////////////////
    127 
    128   /// True only if the object has been successfully initialized
    129   bool Valid;
    130   /// True if the disassembler can provide semantic information
    131   bool HasSemantics;
    132 
    133   /// The stream to write errors to
    134   llvm::raw_ostream &ErrorStream;
    135 
    136   /// The architecture/syntax pair for the current architecture
    137   CPUKey Key;
    138   /// The LLVM target corresponding to the disassembler
    139   const llvm::Target *Tgt;
    140   /// The target machine instance.
    141   llvm::OwningPtr<llvm::TargetMachine> TargetMachine;
    142   /// The assembly information for the target architecture
    143   llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
    144   // The register information for the target architecture.
    145   llvm::OwningPtr<const llvm::MCRegisterInfo> MRI;
    146   /// The disassembler for the target architecture
    147   llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
    148   /// The output string for the instruction printer; must be guarded with
    149   ///   PrinterMutex
    150   llvm::OwningPtr<std::string> InstString;
    151   /// The output stream for the disassembler; must be guarded with
    152   ///   PrinterMutex
    153   llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
    154   /// The instruction printer for the target architecture; must be guarded with
    155   ///   PrinterMutex when printing
    156   llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
    157   /// The mutex that guards the instruction printer's printing functions, which
    158   ///   use a shared stream
    159   llvm::sys::Mutex PrinterMutex;
    160   /// The array of instruction information provided by the TableGen backend for
    161   ///   the target architecture
    162   const llvm::EDInstInfo *InstInfos;
    163   /// The target-specific lexer for use in tokenizing strings, in
    164   ///   target-independent and target-specific portions
    165   llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
    166   llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer;
    167   /// The guard for the above
    168   llvm::sys::Mutex ParserMutex;
    169   /// The LLVM number used for the target disassembly syntax variant
    170   int LLVMSyntaxVariant;
    171 
    172   typedef std::vector<std::string> regvec_t;
    173   typedef std::map<std::string, unsigned> regrmap_t;
    174 
    175   /// A vector of registers for quick mapping from LLVM register IDs to names
    176   regvec_t RegVec;
    177   /// A map of registers for quick mapping from register names to LLVM IDs
    178   regrmap_t RegRMap;
    179 
    180   /// A set of register IDs for aliases of the stack pointer for the current
    181   ///   architecture
    182   std::set<unsigned> stackPointers;
    183   /// A set of register IDs for aliases of the program counter for the current
    184   ///   architecture
    185   std::set<unsigned> programCounters;
    186 
    187   /// Constructor - initializes a disassembler with all the necessary objects,
    188   ///   which come pre-allocated from the registry accessor function
    189   ///
    190   /// @arg key                - the architecture and disassembly syntax for the
    191   ///                           disassembler
    192   EDDisassembler(CPUKey& key);
    193 
    194   /// valid - reports whether there was a failure in the constructor.
    195   bool valid() {
    196     return Valid;
    197   }
    198 
    199   /// hasSemantics - reports whether the disassembler can provide operands and
    200   ///   tokens.
    201   bool hasSemantics() {
    202     return HasSemantics;
    203   }
    204 
    205   ~EDDisassembler();
    206 
    207   /// createInst - creates and returns an instruction given a callback and
    208   ///   memory address, or NULL on failure
    209   ///
    210   /// @arg byteReader - A callback function that provides machine code bytes
    211   /// @arg address    - The address of the first byte of the instruction,
    212   ///                   suitable for passing to byteReader
    213   /// @arg arg        - An opaque argument for byteReader
    214   EDInst *createInst(EDByteReaderCallback byteReader,
    215                      uint64_t address,
    216                      void *arg);
    217 
    218   /// initMaps - initializes regVec and regRMap using the provided register
    219   ///   info
    220   ///
    221   /// @arg registerInfo - the register information to use as a source
    222   void initMaps(const llvm::TargetRegisterInfo &registerInfo);
    223   /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a
    224   ///   register for a given register ID, or NULL on failure
    225   ///
    226   /// @arg registerID - the ID of the register to be queried
    227   const char *nameWithRegisterID(unsigned registerID) const;
    228   /// registerIDWithName - Returns the ID of a register for a given register
    229   ///   name, or (unsigned)-1 on failure
    230   ///
    231   /// @arg name - The name of the register
    232   unsigned registerIDWithName(const char *name) const;
    233 
    234   /// registerIsStackPointer - reports whether a register ID is an alias for the
    235   ///   stack pointer register
    236   ///
    237   /// @arg registerID - The LLVM register ID
    238   bool registerIsStackPointer(unsigned registerID);
    239   /// registerIsStackPointer - reports whether a register ID is an alias for the
    240   ///   stack pointer register
    241   ///
    242   /// @arg registerID - The LLVM register ID
    243   bool registerIsProgramCounter(unsigned registerID);
    244 
    245   /// printInst - prints an MCInst to a string, returning 0 on success, or -1
    246   ///   otherwise
    247   ///
    248   /// @arg str  - A reference to a string which is filled in with the string
    249   ///             representation of the instruction
    250   /// @arg inst - A reference to the MCInst to be printed
    251   int printInst(std::string& str,
    252                 llvm::MCInst& inst);
    253 
    254   /// parseInst - extracts operands and tokens from a string for use in
    255   ///   tokenizing the string.  Returns 0 on success, or -1 otherwise.
    256   ///
    257   /// @arg operands - A reference to a vector that will be filled in with the
    258   ///                 parsed operands
    259   /// @arg tokens   - A reference to a vector that will be filled in with the
    260   ///                 tokens
    261   /// @arg str      - The string representation of the instruction
    262   int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
    263                 llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
    264                 const std::string &str);
    265 
    266   /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
    267   int llvmSyntaxVariant() const;
    268 };
    269 
    270 } // end namespace llvm
    271 
    272 #endif
    273