Home | History | Annotate | Download | only in llvm-mcmarkup
      1 //===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Example simple parser implementation for the MC assembly markup language.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "llvm/ADT/OwningPtr.h"
     15 #include "llvm/Support/CommandLine.h"
     16 #include "llvm/Support/Format.h"
     17 #include "llvm/Support/ManagedStatic.h"
     18 #include "llvm/Support/MemoryBuffer.h"
     19 #include "llvm/Support/PrettyStackTrace.h"
     20 #include "llvm/Support/Signals.h"
     21 #include "llvm/Support/SourceMgr.h"
     22 #include "llvm/Support/raw_ostream.h"
     23 #include "llvm/Support/system_error.h"
     24 using namespace llvm;
     25 
     26 static cl::list<std::string>
     27        InputFilenames(cl::Positional, cl::desc("<input files>"),
     28                       cl::ZeroOrMore);
     29 static cl::opt<bool>
     30 DumpTags("dump-tags", cl::desc("List all tags encountered in input"));
     31 
     32 static StringRef ToolName;
     33 
     34 /// Trivial lexer for the markup parser. Input is always handled a character
     35 /// at a time. The lexer just encapsulates EOF and lookahead handling.
     36 class MarkupLexer {
     37   StringRef::const_iterator Start;
     38   StringRef::const_iterator CurPtr;
     39   StringRef::const_iterator End;
     40 public:
     41   MarkupLexer(StringRef Source)
     42     : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {}
     43   // When processing non-markup, input is consumed a character at a time.
     44   bool isEOF() { return CurPtr == End; }
     45   int getNextChar() {
     46     if (CurPtr == End) return EOF;
     47     return *CurPtr++;
     48   }
     49   int peekNextChar() {
     50     if (CurPtr == End) return EOF;
     51     return *CurPtr;
     52   }
     53   StringRef::const_iterator getPosition() const { return CurPtr; }
     54 };
     55 
     56 /// A markup tag is a name and a (usually empty) list of modifiers.
     57 class MarkupTag {
     58   StringRef Name;
     59   StringRef Modifiers;
     60   SMLoc StartLoc;
     61 public:
     62   MarkupTag(StringRef n, StringRef m, SMLoc Loc)
     63     : Name(n), Modifiers(m), StartLoc(Loc) {}
     64   StringRef getName() const { return Name; }
     65   StringRef getModifiers() const { return Modifiers; }
     66   SMLoc getLoc() const { return StartLoc; }
     67 };
     68 
     69 /// A simple parser implementation for creating MarkupTags from input text.
     70 class MarkupParser {
     71   MarkupLexer &Lex;
     72   SourceMgr &SM;
     73 public:
     74   MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {}
     75   /// Create a MarkupTag from the current position in the MarkupLexer.
     76   /// The parseTag() method should be called when the lexer has processed
     77   /// the opening '<' character. Input will be consumed up to and including
     78   /// the ':' which terminates the tag open.
     79   MarkupTag parseTag();
     80   /// Issue a diagnostic and terminate program execution.
     81   void FatalError(SMLoc Loc, StringRef Msg);
     82 };
     83 
     84 void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) {
     85   SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg);
     86   exit(1);
     87 }
     88 
     89 // Example handler for when a tag is recognized.
     90 static void processStartTag(MarkupTag &Tag) {
     91   // If we're just printing the tags, do that, otherwise do some simple
     92   // colorization.
     93   if (DumpTags) {
     94     outs() << Tag.getName();
     95     if (Tag.getModifiers().size())
     96       outs() << " " << Tag.getModifiers();
     97     outs() << "\n";
     98     return;
     99   }
    100 
    101   if (!outs().has_colors())
    102     return;
    103   // Color registers as red and immediates as cyan. Those don't have nested
    104   // tags, so don't bother keeping a stack of colors to reset to.
    105   if (Tag.getName() == "reg")
    106     outs().changeColor(raw_ostream::RED);
    107   else if (Tag.getName() == "imm")
    108     outs().changeColor(raw_ostream::CYAN);
    109 }
    110 
    111 // Example handler for when the end of a tag is recognized.
    112 static void processEndTag(MarkupTag &Tag) {
    113   // If we're printing the tags, there's nothing more to do here. Otherwise,
    114   // set the color back the normal.
    115   if (DumpTags)
    116     return;
    117   if (!outs().has_colors())
    118     return;
    119   // Just reset to basic white.
    120   outs().changeColor(raw_ostream::WHITE, false);
    121 }
    122 
    123 MarkupTag MarkupParser::parseTag() {
    124   // First off, extract the tag into it's own StringRef so we can look at it
    125   // outside of the context of consuming input.
    126   StringRef::const_iterator Start = Lex.getPosition();
    127   SMLoc Loc = SMLoc::getFromPointer(Start - 1);
    128   while(Lex.getNextChar() != ':') {
    129     // EOF is an error.
    130     if (Lex.isEOF())
    131       FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag");
    132   }
    133   StringRef RawTag(Start, Lex.getPosition() - Start - 1);
    134   std::pair<StringRef, StringRef> SplitTag = RawTag.split(' ');
    135   return MarkupTag(SplitTag.first, SplitTag.second, Loc);
    136 }
    137 
    138 static void parseMCMarkup(StringRef Filename) {
    139   OwningPtr<MemoryBuffer> BufferPtr;
    140   if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, BufferPtr)) {
    141     errs() << ToolName << ": " << ec.message() << '\n';
    142     return;
    143   }
    144   MemoryBuffer *Buffer = BufferPtr.take();
    145 
    146   SourceMgr SrcMgr;
    147 
    148   // Tell SrcMgr about this buffer, which is what the parser will pick up.
    149   SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
    150 
    151   StringRef InputSource = Buffer->getBuffer();
    152   MarkupLexer Lex(InputSource);
    153   MarkupParser Parser(Lex, SrcMgr);
    154 
    155   SmallVector<MarkupTag, 4> TagStack;
    156 
    157   for (int CurChar = Lex.getNextChar();
    158        CurChar != EOF;
    159        CurChar = Lex.getNextChar()) {
    160     switch (CurChar) {
    161     case '<': {
    162       // A "<<" is output as a literal '<' and does not start a markup tag.
    163       if (Lex.peekNextChar() == '<') {
    164         (void)Lex.getNextChar();
    165         break;
    166       }
    167       // Parse the markup entry.
    168       TagStack.push_back(Parser.parseTag());
    169 
    170       // Do any special handling for the start of a tag.
    171       processStartTag(TagStack.back());
    172       continue;
    173     }
    174     case '>': {
    175       SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1);
    176       // A ">>" is output as a literal '>' and does not end a markup tag.
    177       if (Lex.peekNextChar() == '>') {
    178         (void)Lex.getNextChar();
    179         break;
    180       }
    181       // Close out the innermost tag.
    182       if (TagStack.empty())
    183         Parser.FatalError(Loc, "'>' without matching '<'");
    184 
    185       // Do any special handling for the end of a tag.
    186       processEndTag(TagStack.back());
    187 
    188       TagStack.pop_back();
    189       continue;
    190     }
    191     default:
    192       break;
    193     }
    194     // For anything else, just echo the character back out.
    195     if (!DumpTags && CurChar != EOF)
    196       outs() << (char)CurChar;
    197   }
    198 
    199   // If there are any unterminated markup tags, issue diagnostics for them.
    200   while (!TagStack.empty()) {
    201     MarkupTag &Tag = TagStack.back();
    202     SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error,
    203                         "unterminated markup tag");
    204     TagStack.pop_back();
    205   }
    206 }
    207 
    208 int main(int argc, char **argv) {
    209   // Print a stack trace if we signal out.
    210   sys::PrintStackTraceOnErrorSignal();
    211   PrettyStackTraceProgram X(argc, argv);
    212 
    213   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
    214   cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n");
    215 
    216   ToolName = argv[0];
    217 
    218   // If no input files specified, read from stdin.
    219   if (InputFilenames.size() == 0)
    220     InputFilenames.push_back("-");
    221 
    222   std::for_each(InputFilenames.begin(), InputFilenames.end(),
    223                 parseMCMarkup);
    224   return 0;
    225 }
    226