1 //===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Example simple parser implementation for the MC assembly markup language. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/OwningPtr.h" 15 #include "llvm/Support/CommandLine.h" 16 #include "llvm/Support/Format.h" 17 #include "llvm/Support/ManagedStatic.h" 18 #include "llvm/Support/MemoryBuffer.h" 19 #include "llvm/Support/PrettyStackTrace.h" 20 #include "llvm/Support/Signals.h" 21 #include "llvm/Support/SourceMgr.h" 22 #include "llvm/Support/raw_ostream.h" 23 #include "llvm/Support/system_error.h" 24 using namespace llvm; 25 26 static cl::list<std::string> 27 InputFilenames(cl::Positional, cl::desc("<input files>"), 28 cl::ZeroOrMore); 29 static cl::opt<bool> 30 DumpTags("dump-tags", cl::desc("List all tags encountered in input")); 31 32 static StringRef ToolName; 33 34 /// Trivial lexer for the markup parser. Input is always handled a character 35 /// at a time. The lexer just encapsulates EOF and lookahead handling. 36 class MarkupLexer { 37 StringRef::const_iterator Start; 38 StringRef::const_iterator CurPtr; 39 StringRef::const_iterator End; 40 public: 41 MarkupLexer(StringRef Source) 42 : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {} 43 // When processing non-markup, input is consumed a character at a time. 44 bool isEOF() { return CurPtr == End; } 45 int getNextChar() { 46 if (CurPtr == End) return EOF; 47 return *CurPtr++; 48 } 49 int peekNextChar() { 50 if (CurPtr == End) return EOF; 51 return *CurPtr; 52 } 53 StringRef::const_iterator getPosition() const { return CurPtr; } 54 }; 55 56 /// A markup tag is a name and a (usually empty) list of modifiers. 57 class MarkupTag { 58 StringRef Name; 59 StringRef Modifiers; 60 SMLoc StartLoc; 61 public: 62 MarkupTag(StringRef n, StringRef m, SMLoc Loc) 63 : Name(n), Modifiers(m), StartLoc(Loc) {} 64 StringRef getName() const { return Name; } 65 StringRef getModifiers() const { return Modifiers; } 66 SMLoc getLoc() const { return StartLoc; } 67 }; 68 69 /// A simple parser implementation for creating MarkupTags from input text. 70 class MarkupParser { 71 MarkupLexer &Lex; 72 SourceMgr &SM; 73 public: 74 MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {} 75 /// Create a MarkupTag from the current position in the MarkupLexer. 76 /// The parseTag() method should be called when the lexer has processed 77 /// the opening '<' character. Input will be consumed up to and including 78 /// the ':' which terminates the tag open. 79 MarkupTag parseTag(); 80 /// Issue a diagnostic and terminate program execution. 81 void FatalError(SMLoc Loc, StringRef Msg); 82 }; 83 84 void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) { 85 SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg); 86 exit(1); 87 } 88 89 // Example handler for when a tag is recognized. 90 static void processStartTag(MarkupTag &Tag) { 91 // If we're just printing the tags, do that, otherwise do some simple 92 // colorization. 93 if (DumpTags) { 94 outs() << Tag.getName(); 95 if (Tag.getModifiers().size()) 96 outs() << " " << Tag.getModifiers(); 97 outs() << "\n"; 98 return; 99 } 100 101 if (!outs().has_colors()) 102 return; 103 // Color registers as red and immediates as cyan. Those don't have nested 104 // tags, so don't bother keeping a stack of colors to reset to. 105 if (Tag.getName() == "reg") 106 outs().changeColor(raw_ostream::RED); 107 else if (Tag.getName() == "imm") 108 outs().changeColor(raw_ostream::CYAN); 109 } 110 111 // Example handler for when the end of a tag is recognized. 112 static void processEndTag(MarkupTag &Tag) { 113 // If we're printing the tags, there's nothing more to do here. Otherwise, 114 // set the color back the normal. 115 if (DumpTags) 116 return; 117 if (!outs().has_colors()) 118 return; 119 // Just reset to basic white. 120 outs().changeColor(raw_ostream::WHITE, false); 121 } 122 123 MarkupTag MarkupParser::parseTag() { 124 // First off, extract the tag into it's own StringRef so we can look at it 125 // outside of the context of consuming input. 126 StringRef::const_iterator Start = Lex.getPosition(); 127 SMLoc Loc = SMLoc::getFromPointer(Start - 1); 128 while(Lex.getNextChar() != ':') { 129 // EOF is an error. 130 if (Lex.isEOF()) 131 FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag"); 132 } 133 StringRef RawTag(Start, Lex.getPosition() - Start - 1); 134 std::pair<StringRef, StringRef> SplitTag = RawTag.split(' '); 135 return MarkupTag(SplitTag.first, SplitTag.second, Loc); 136 } 137 138 static void parseMCMarkup(StringRef Filename) { 139 OwningPtr<MemoryBuffer> BufferPtr; 140 if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, BufferPtr)) { 141 errs() << ToolName << ": " << ec.message() << '\n'; 142 return; 143 } 144 MemoryBuffer *Buffer = BufferPtr.take(); 145 146 SourceMgr SrcMgr; 147 148 // Tell SrcMgr about this buffer, which is what the parser will pick up. 149 SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); 150 151 StringRef InputSource = Buffer->getBuffer(); 152 MarkupLexer Lex(InputSource); 153 MarkupParser Parser(Lex, SrcMgr); 154 155 SmallVector<MarkupTag, 4> TagStack; 156 157 for (int CurChar = Lex.getNextChar(); 158 CurChar != EOF; 159 CurChar = Lex.getNextChar()) { 160 switch (CurChar) { 161 case '<': { 162 // A "<<" is output as a literal '<' and does not start a markup tag. 163 if (Lex.peekNextChar() == '<') { 164 (void)Lex.getNextChar(); 165 break; 166 } 167 // Parse the markup entry. 168 TagStack.push_back(Parser.parseTag()); 169 170 // Do any special handling for the start of a tag. 171 processStartTag(TagStack.back()); 172 continue; 173 } 174 case '>': { 175 SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1); 176 // A ">>" is output as a literal '>' and does not end a markup tag. 177 if (Lex.peekNextChar() == '>') { 178 (void)Lex.getNextChar(); 179 break; 180 } 181 // Close out the innermost tag. 182 if (TagStack.empty()) 183 Parser.FatalError(Loc, "'>' without matching '<'"); 184 185 // Do any special handling for the end of a tag. 186 processEndTag(TagStack.back()); 187 188 TagStack.pop_back(); 189 continue; 190 } 191 default: 192 break; 193 } 194 // For anything else, just echo the character back out. 195 if (!DumpTags && CurChar != EOF) 196 outs() << (char)CurChar; 197 } 198 199 // If there are any unterminated markup tags, issue diagnostics for them. 200 while (!TagStack.empty()) { 201 MarkupTag &Tag = TagStack.back(); 202 SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error, 203 "unterminated markup tag"); 204 TagStack.pop_back(); 205 } 206 } 207 208 int main(int argc, char **argv) { 209 // Print a stack trace if we signal out. 210 sys::PrintStackTraceOnErrorSignal(); 211 PrettyStackTraceProgram X(argc, argv); 212 213 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. 214 cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n"); 215 216 ToolName = argv[0]; 217 218 // If no input files specified, read from stdin. 219 if (InputFilenames.size() == 0) 220 InputFilenames.push_back("-"); 221 222 std::for_each(InputFilenames.begin(), InputFilenames.end(), 223 parseMCMarkup); 224 return 0; 225 } 226