1 //===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Example simple parser implementation for the MC assembly markup language. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Support/CommandLine.h" 15 #include "llvm/Support/Format.h" 16 #include "llvm/Support/ManagedStatic.h" 17 #include "llvm/Support/MemoryBuffer.h" 18 #include "llvm/Support/PrettyStackTrace.h" 19 #include "llvm/Support/Signals.h" 20 #include "llvm/Support/SourceMgr.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include <system_error> 23 using namespace llvm; 24 25 static cl::list<std::string> 26 InputFilenames(cl::Positional, cl::desc("<input files>"), 27 cl::ZeroOrMore); 28 static cl::opt<bool> 29 DumpTags("dump-tags", cl::desc("List all tags encountered in input")); 30 31 static StringRef ToolName; 32 33 /// Trivial lexer for the markup parser. Input is always handled a character 34 /// at a time. The lexer just encapsulates EOF and lookahead handling. 35 class MarkupLexer { 36 StringRef::const_iterator Start; 37 StringRef::const_iterator CurPtr; 38 StringRef::const_iterator End; 39 public: 40 MarkupLexer(StringRef Source) 41 : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {} 42 // When processing non-markup, input is consumed a character at a time. 43 bool isEOF() { return CurPtr == End; } 44 int getNextChar() { 45 if (CurPtr == End) return EOF; 46 return *CurPtr++; 47 } 48 int peekNextChar() { 49 if (CurPtr == End) return EOF; 50 return *CurPtr; 51 } 52 StringRef::const_iterator getPosition() const { return CurPtr; } 53 }; 54 55 /// A markup tag is a name and a (usually empty) list of modifiers. 56 class MarkupTag { 57 StringRef Name; 58 StringRef Modifiers; 59 SMLoc StartLoc; 60 public: 61 MarkupTag(StringRef n, StringRef m, SMLoc Loc) 62 : Name(n), Modifiers(m), StartLoc(Loc) {} 63 StringRef getName() const { return Name; } 64 StringRef getModifiers() const { return Modifiers; } 65 SMLoc getLoc() const { return StartLoc; } 66 }; 67 68 /// A simple parser implementation for creating MarkupTags from input text. 69 class MarkupParser { 70 MarkupLexer &Lex; 71 SourceMgr &SM; 72 public: 73 MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {} 74 /// Create a MarkupTag from the current position in the MarkupLexer. 75 /// The parseTag() method should be called when the lexer has processed 76 /// the opening '<' character. Input will be consumed up to and including 77 /// the ':' which terminates the tag open. 78 MarkupTag parseTag(); 79 /// Issue a diagnostic and terminate program execution. 80 void FatalError(SMLoc Loc, StringRef Msg); 81 }; 82 83 void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) { 84 SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg); 85 exit(1); 86 } 87 88 // Example handler for when a tag is recognized. 89 static void processStartTag(MarkupTag &Tag) { 90 // If we're just printing the tags, do that, otherwise do some simple 91 // colorization. 92 if (DumpTags) { 93 outs() << Tag.getName(); 94 if (Tag.getModifiers().size()) 95 outs() << " " << Tag.getModifiers(); 96 outs() << "\n"; 97 return; 98 } 99 100 if (!outs().has_colors()) 101 return; 102 // Color registers as red and immediates as cyan. Those don't have nested 103 // tags, so don't bother keeping a stack of colors to reset to. 104 if (Tag.getName() == "reg") 105 outs().changeColor(raw_ostream::RED); 106 else if (Tag.getName() == "imm") 107 outs().changeColor(raw_ostream::CYAN); 108 } 109 110 // Example handler for when the end of a tag is recognized. 111 static void processEndTag(MarkupTag &Tag) { 112 // If we're printing the tags, there's nothing more to do here. Otherwise, 113 // set the color back the normal. 114 if (DumpTags) 115 return; 116 if (!outs().has_colors()) 117 return; 118 // Just reset to basic white. 119 outs().changeColor(raw_ostream::WHITE, false); 120 } 121 122 MarkupTag MarkupParser::parseTag() { 123 // First off, extract the tag into it's own StringRef so we can look at it 124 // outside of the context of consuming input. 125 StringRef::const_iterator Start = Lex.getPosition(); 126 SMLoc Loc = SMLoc::getFromPointer(Start - 1); 127 while(Lex.getNextChar() != ':') { 128 // EOF is an error. 129 if (Lex.isEOF()) 130 FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag"); 131 } 132 StringRef RawTag(Start, Lex.getPosition() - Start - 1); 133 std::pair<StringRef, StringRef> SplitTag = RawTag.split(' '); 134 return MarkupTag(SplitTag.first, SplitTag.second, Loc); 135 } 136 137 static void parseMCMarkup(StringRef Filename) { 138 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr = 139 MemoryBuffer::getFileOrSTDIN(Filename); 140 if (std::error_code EC = BufferPtr.getError()) { 141 errs() << ToolName << ": " << EC.message() << '\n'; 142 return; 143 } 144 std::unique_ptr<MemoryBuffer> &Buffer = BufferPtr.get(); 145 146 SourceMgr SrcMgr; 147 148 StringRef InputSource = Buffer->getBuffer(); 149 150 // Tell SrcMgr about this buffer, which is what the parser will pick up. 151 SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); 152 153 MarkupLexer Lex(InputSource); 154 MarkupParser Parser(Lex, SrcMgr); 155 156 SmallVector<MarkupTag, 4> TagStack; 157 158 for (int CurChar = Lex.getNextChar(); 159 CurChar != EOF; 160 CurChar = Lex.getNextChar()) { 161 switch (CurChar) { 162 case '<': { 163 // A "<<" is output as a literal '<' and does not start a markup tag. 164 if (Lex.peekNextChar() == '<') { 165 (void)Lex.getNextChar(); 166 break; 167 } 168 // Parse the markup entry. 169 TagStack.push_back(Parser.parseTag()); 170 171 // Do any special handling for the start of a tag. 172 processStartTag(TagStack.back()); 173 continue; 174 } 175 case '>': { 176 SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1); 177 // A ">>" is output as a literal '>' and does not end a markup tag. 178 if (Lex.peekNextChar() == '>') { 179 (void)Lex.getNextChar(); 180 break; 181 } 182 // Close out the innermost tag. 183 if (TagStack.empty()) 184 Parser.FatalError(Loc, "'>' without matching '<'"); 185 186 // Do any special handling for the end of a tag. 187 processEndTag(TagStack.back()); 188 189 TagStack.pop_back(); 190 continue; 191 } 192 default: 193 break; 194 } 195 // For anything else, just echo the character back out. 196 if (!DumpTags && CurChar != EOF) 197 outs() << (char)CurChar; 198 } 199 200 // If there are any unterminated markup tags, issue diagnostics for them. 201 while (!TagStack.empty()) { 202 MarkupTag &Tag = TagStack.back(); 203 SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error, 204 "unterminated markup tag"); 205 TagStack.pop_back(); 206 } 207 } 208 209 int main(int argc, char **argv) { 210 // Print a stack trace if we signal out. 211 sys::PrintStackTraceOnErrorSignal(); 212 PrettyStackTraceProgram X(argc, argv); 213 214 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. 215 cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n"); 216 217 ToolName = argv[0]; 218 219 // If no input files specified, read from stdin. 220 if (InputFilenames.size() == 0) 221 InputFilenames.push_back("-"); 222 223 std::for_each(InputFilenames.begin(), InputFilenames.end(), 224 parseMCMarkup); 225 return 0; 226 } 227