Home | History | Annotate | Download | only in llvm-mcmarkup
      1 //===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Example simple parser implementation for the MC assembly markup language.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "llvm/Support/CommandLine.h"
     15 #include "llvm/Support/Format.h"
     16 #include "llvm/Support/ManagedStatic.h"
     17 #include "llvm/Support/MemoryBuffer.h"
     18 #include "llvm/Support/PrettyStackTrace.h"
     19 #include "llvm/Support/Signals.h"
     20 #include "llvm/Support/SourceMgr.h"
     21 #include "llvm/Support/raw_ostream.h"
     22 #include <system_error>
     23 using namespace llvm;
     24 
     25 static cl::list<std::string>
     26        InputFilenames(cl::Positional, cl::desc("<input files>"),
     27                       cl::ZeroOrMore);
     28 static cl::opt<bool>
     29 DumpTags("dump-tags", cl::desc("List all tags encountered in input"));
     30 
     31 static StringRef ToolName;
     32 
     33 /// Trivial lexer for the markup parser. Input is always handled a character
     34 /// at a time. The lexer just encapsulates EOF and lookahead handling.
     35 class MarkupLexer {
     36   StringRef::const_iterator Start;
     37   StringRef::const_iterator CurPtr;
     38   StringRef::const_iterator End;
     39 public:
     40   MarkupLexer(StringRef Source)
     41     : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {}
     42   // When processing non-markup, input is consumed a character at a time.
     43   bool isEOF() { return CurPtr == End; }
     44   int getNextChar() {
     45     if (CurPtr == End) return EOF;
     46     return *CurPtr++;
     47   }
     48   int peekNextChar() {
     49     if (CurPtr == End) return EOF;
     50     return *CurPtr;
     51   }
     52   StringRef::const_iterator getPosition() const { return CurPtr; }
     53 };
     54 
     55 /// A markup tag is a name and a (usually empty) list of modifiers.
     56 class MarkupTag {
     57   StringRef Name;
     58   StringRef Modifiers;
     59   SMLoc StartLoc;
     60 public:
     61   MarkupTag(StringRef n, StringRef m, SMLoc Loc)
     62     : Name(n), Modifiers(m), StartLoc(Loc) {}
     63   StringRef getName() const { return Name; }
     64   StringRef getModifiers() const { return Modifiers; }
     65   SMLoc getLoc() const { return StartLoc; }
     66 };
     67 
     68 /// A simple parser implementation for creating MarkupTags from input text.
     69 class MarkupParser {
     70   MarkupLexer &Lex;
     71   SourceMgr &SM;
     72 public:
     73   MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {}
     74   /// Create a MarkupTag from the current position in the MarkupLexer.
     75   /// The parseTag() method should be called when the lexer has processed
     76   /// the opening '<' character. Input will be consumed up to and including
     77   /// the ':' which terminates the tag open.
     78   MarkupTag parseTag();
     79   /// Issue a diagnostic and terminate program execution.
     80   void FatalError(SMLoc Loc, StringRef Msg);
     81 };
     82 
     83 void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) {
     84   SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg);
     85   exit(1);
     86 }
     87 
     88 // Example handler for when a tag is recognized.
     89 static void processStartTag(MarkupTag &Tag) {
     90   // If we're just printing the tags, do that, otherwise do some simple
     91   // colorization.
     92   if (DumpTags) {
     93     outs() << Tag.getName();
     94     if (Tag.getModifiers().size())
     95       outs() << " " << Tag.getModifiers();
     96     outs() << "\n";
     97     return;
     98   }
     99 
    100   if (!outs().has_colors())
    101     return;
    102   // Color registers as red and immediates as cyan. Those don't have nested
    103   // tags, so don't bother keeping a stack of colors to reset to.
    104   if (Tag.getName() == "reg")
    105     outs().changeColor(raw_ostream::RED);
    106   else if (Tag.getName() == "imm")
    107     outs().changeColor(raw_ostream::CYAN);
    108 }
    109 
    110 // Example handler for when the end of a tag is recognized.
    111 static void processEndTag(MarkupTag &Tag) {
    112   // If we're printing the tags, there's nothing more to do here. Otherwise,
    113   // set the color back the normal.
    114   if (DumpTags)
    115     return;
    116   if (!outs().has_colors())
    117     return;
    118   // Just reset to basic white.
    119   outs().changeColor(raw_ostream::WHITE, false);
    120 }
    121 
    122 MarkupTag MarkupParser::parseTag() {
    123   // First off, extract the tag into it's own StringRef so we can look at it
    124   // outside of the context of consuming input.
    125   StringRef::const_iterator Start = Lex.getPosition();
    126   SMLoc Loc = SMLoc::getFromPointer(Start - 1);
    127   while(Lex.getNextChar() != ':') {
    128     // EOF is an error.
    129     if (Lex.isEOF())
    130       FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag");
    131   }
    132   StringRef RawTag(Start, Lex.getPosition() - Start - 1);
    133   std::pair<StringRef, StringRef> SplitTag = RawTag.split(' ');
    134   return MarkupTag(SplitTag.first, SplitTag.second, Loc);
    135 }
    136 
    137 static void parseMCMarkup(StringRef Filename) {
    138   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr =
    139       MemoryBuffer::getFileOrSTDIN(Filename);
    140   if (std::error_code EC = BufferPtr.getError()) {
    141     errs() << ToolName << ": " << EC.message() << '\n';
    142     return;
    143   }
    144   std::unique_ptr<MemoryBuffer> &Buffer = BufferPtr.get();
    145 
    146   SourceMgr SrcMgr;
    147 
    148   StringRef InputSource = Buffer->getBuffer();
    149 
    150   // Tell SrcMgr about this buffer, which is what the parser will pick up.
    151   SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
    152 
    153   MarkupLexer Lex(InputSource);
    154   MarkupParser Parser(Lex, SrcMgr);
    155 
    156   SmallVector<MarkupTag, 4> TagStack;
    157 
    158   for (int CurChar = Lex.getNextChar();
    159        CurChar != EOF;
    160        CurChar = Lex.getNextChar()) {
    161     switch (CurChar) {
    162     case '<': {
    163       // A "<<" is output as a literal '<' and does not start a markup tag.
    164       if (Lex.peekNextChar() == '<') {
    165         (void)Lex.getNextChar();
    166         break;
    167       }
    168       // Parse the markup entry.
    169       TagStack.push_back(Parser.parseTag());
    170 
    171       // Do any special handling for the start of a tag.
    172       processStartTag(TagStack.back());
    173       continue;
    174     }
    175     case '>': {
    176       SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1);
    177       // A ">>" is output as a literal '>' and does not end a markup tag.
    178       if (Lex.peekNextChar() == '>') {
    179         (void)Lex.getNextChar();
    180         break;
    181       }
    182       // Close out the innermost tag.
    183       if (TagStack.empty())
    184         Parser.FatalError(Loc, "'>' without matching '<'");
    185 
    186       // Do any special handling for the end of a tag.
    187       processEndTag(TagStack.back());
    188 
    189       TagStack.pop_back();
    190       continue;
    191     }
    192     default:
    193       break;
    194     }
    195     // For anything else, just echo the character back out.
    196     if (!DumpTags && CurChar != EOF)
    197       outs() << (char)CurChar;
    198   }
    199 
    200   // If there are any unterminated markup tags, issue diagnostics for them.
    201   while (!TagStack.empty()) {
    202     MarkupTag &Tag = TagStack.back();
    203     SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error,
    204                         "unterminated markup tag");
    205     TagStack.pop_back();
    206   }
    207 }
    208 
    209 int main(int argc, char **argv) {
    210   // Print a stack trace if we signal out.
    211   sys::PrintStackTraceOnErrorSignal();
    212   PrettyStackTraceProgram X(argc, argv);
    213 
    214   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
    215   cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n");
    216 
    217   ToolName = argv[0];
    218 
    219   // If no input files specified, read from stdin.
    220   if (InputFilenames.size() == 0)
    221     InputFilenames.push_back("-");
    222 
    223   std::for_each(InputFilenames.begin(), InputFilenames.end(),
    224                 parseMCMarkup);
    225   return 0;
    226 }
    227