Home | History | Annotate | Download | only in FileCheck
      1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // FileCheck does a line-by line check of a file that validates whether it
     11 // contains the expected content.  This is useful for regression tests etc.
     12 //
     13 // This program exits with an error status of 2 on error, exit status of 0 if
     14 // the file matched the expected contents, and exit status of 1 if it did not
     15 // contain the expected contents.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "llvm/ADT/OwningPtr.h"
     20 #include "llvm/ADT/SmallString.h"
     21 #include "llvm/ADT/StringExtras.h"
     22 #include "llvm/ADT/StringMap.h"
     23 #include "llvm/Support/CommandLine.h"
     24 #include "llvm/Support/MemoryBuffer.h"
     25 #include "llvm/Support/PrettyStackTrace.h"
     26 #include "llvm/Support/Regex.h"
     27 #include "llvm/Support/Signals.h"
     28 #include "llvm/Support/SourceMgr.h"
     29 #include "llvm/Support/raw_ostream.h"
     30 #include "llvm/Support/system_error.h"
     31 #include <algorithm>
     32 #include <map>
     33 #include <string>
     34 #include <vector>
     35 using namespace llvm;
     36 
     37 static cl::opt<std::string>
     38 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
     39 
     40 static cl::opt<std::string>
     41 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
     42               cl::init("-"), cl::value_desc("filename"));
     43 
     44 static cl::opt<std::string>
     45 CheckPrefix("check-prefix", cl::init("CHECK"),
     46             cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
     47 
     48 static cl::opt<bool>
     49 NoCanonicalizeWhiteSpace("strict-whitespace",
     50               cl::desc("Do not treat all horizontal whitespace as equivalent"));
     51 
     52 //===----------------------------------------------------------------------===//
     53 // Pattern Handling Code.
     54 //===----------------------------------------------------------------------===//
     55 
     56 class Pattern {
     57   SMLoc PatternLoc;
     58 
     59   /// MatchEOF - When set, this pattern only matches the end of file. This is
     60   /// used for trailing CHECK-NOTs.
     61   bool MatchEOF;
     62 
     63   /// MatchNot
     64   bool MatchNot;
     65 
     66   /// MatchDag
     67   bool MatchDag;
     68 
     69   /// FixedStr - If non-empty, this pattern is a fixed string match with the
     70   /// specified fixed string.
     71   StringRef FixedStr;
     72 
     73   /// RegEx - If non-empty, this is a regex pattern.
     74   std::string RegExStr;
     75 
     76   /// \brief Contains the number of line this pattern is in.
     77   unsigned LineNumber;
     78 
     79   /// VariableUses - Entries in this vector map to uses of a variable in the
     80   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
     81   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
     82   /// value of bar at offset 3.
     83   std::vector<std::pair<StringRef, unsigned> > VariableUses;
     84 
     85   /// VariableDefs - Maps definitions of variables to their parenthesized
     86   /// capture numbers.
     87   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
     88   std::map<StringRef, unsigned> VariableDefs;
     89 
     90 public:
     91 
     92   Pattern(bool matchEOF = false)
     93     : MatchEOF(matchEOF), MatchNot(false), MatchDag(false) { }
     94 
     95   /// getLoc - Return the location in source code.
     96   SMLoc getLoc() const { return PatternLoc; }
     97 
     98   /// ParsePattern - Parse the given string into the Pattern.  SM provides the
     99   /// SourceMgr used for error reports, and LineNumber is the line number in
    100   /// the input file from which the pattern string was read.
    101   /// Returns true in case of an error, false otherwise.
    102   bool ParsePattern(StringRef PatternStr, SourceMgr &SM, unsigned LineNumber);
    103 
    104   /// Match - Match the pattern string against the input buffer Buffer.  This
    105   /// returns the position that is matched or npos if there is no match.  If
    106   /// there is a match, the size of the matched string is returned in MatchLen.
    107   ///
    108   /// The VariableTable StringMap provides the current values of filecheck
    109   /// variables and is updated if this match defines new values.
    110   size_t Match(StringRef Buffer, size_t &MatchLen,
    111                StringMap<StringRef> &VariableTable) const;
    112 
    113   /// PrintFailureInfo - Print additional information about a failure to match
    114   /// involving this pattern.
    115   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
    116                         const StringMap<StringRef> &VariableTable) const;
    117 
    118   bool hasVariable() const { return !(VariableUses.empty() &&
    119                                       VariableDefs.empty()); }
    120 
    121   void setMatchNot(bool Not) { MatchNot = Not; }
    122   bool getMatchNot() const { return MatchNot; }
    123 
    124   void setMatchDag(bool Dag) { MatchDag = Dag; }
    125   bool getMatchDag() const { return MatchDag; }
    126 
    127 private:
    128   static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr);
    129   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
    130   void AddBackrefToRegEx(unsigned BackrefNum);
    131 
    132   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
    133   /// matching this pattern at the start of \arg Buffer; a distance of zero
    134   /// should correspond to a perfect match.
    135   unsigned ComputeMatchDistance(StringRef Buffer,
    136                                const StringMap<StringRef> &VariableTable) const;
    137 
    138   /// \brief Evaluates expression and stores the result to \p Value.
    139   /// \return true on success. false when the expression has invalid syntax.
    140   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
    141 
    142   /// \brief Finds the closing sequence of a regex variable usage or
    143   /// definition. Str has to point in the beginning of the definition
    144   /// (right after the opening sequence).
    145   /// \return offset of the closing sequence within Str, or npos if it was not
    146   /// found.
    147   size_t FindRegexVarEnd(StringRef Str);
    148 };
    149 
    150 
    151 bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM,
    152                            unsigned LineNumber) {
    153   this->LineNumber = LineNumber;
    154   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
    155 
    156   // Ignore trailing whitespace.
    157   while (!PatternStr.empty() &&
    158          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
    159     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
    160 
    161   // Check that there is something on the line.
    162   if (PatternStr.empty()) {
    163     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
    164                     "found empty check string with prefix '" +
    165                     CheckPrefix+":'");
    166     return true;
    167   }
    168 
    169   // Check to see if this is a fixed string, or if it has regex pieces.
    170   if (PatternStr.size() < 2 ||
    171       (PatternStr.find("{{") == StringRef::npos &&
    172        PatternStr.find("[[") == StringRef::npos)) {
    173     FixedStr = PatternStr;
    174     return false;
    175   }
    176 
    177   // Paren value #0 is for the fully matched string.  Any new parenthesized
    178   // values add from there.
    179   unsigned CurParen = 1;
    180 
    181   // Otherwise, there is at least one regex piece.  Build up the regex pattern
    182   // by escaping scary characters in fixed strings, building up one big regex.
    183   while (!PatternStr.empty()) {
    184     // RegEx matches.
    185     if (PatternStr.startswith("{{")) {
    186       // This is the start of a regex match.  Scan for the }}.
    187       size_t End = PatternStr.find("}}");
    188       if (End == StringRef::npos) {
    189         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
    190                         SourceMgr::DK_Error,
    191                         "found start of regex string with no end '}}'");
    192         return true;
    193       }
    194 
    195       // Enclose {{}} patterns in parens just like [[]] even though we're not
    196       // capturing the result for any purpose.  This is required in case the
    197       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
    198       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
    199       RegExStr += '(';
    200       ++CurParen;
    201 
    202       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
    203         return true;
    204       RegExStr += ')';
    205 
    206       PatternStr = PatternStr.substr(End+2);
    207       continue;
    208     }
    209 
    210     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
    211     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
    212     // second form is [[foo]] which is a reference to foo.  The variable name
    213     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
    214     // it.  This is to catch some common errors.
    215     if (PatternStr.startswith("[[")) {
    216       // Find the closing bracket pair ending the match.  End is going to be an
    217       // offset relative to the beginning of the match string.
    218       size_t End = FindRegexVarEnd(PatternStr.substr(2));
    219 
    220       if (End == StringRef::npos) {
    221         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
    222                         SourceMgr::DK_Error,
    223                         "invalid named regex reference, no ]] found");
    224         return true;
    225       }
    226 
    227       StringRef MatchStr = PatternStr.substr(2, End);
    228       PatternStr = PatternStr.substr(End+4);
    229 
    230       // Get the regex name (e.g. "foo").
    231       size_t NameEnd = MatchStr.find(':');
    232       StringRef Name = MatchStr.substr(0, NameEnd);
    233 
    234       if (Name.empty()) {
    235         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
    236                         "invalid name in named regex: empty name");
    237         return true;
    238       }
    239 
    240       // Verify that the name/expression is well formed. FileCheck currently
    241       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
    242       // is relaxed, more strict check is performed in \c EvaluateExpression.
    243       bool IsExpression = false;
    244       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
    245         if (i == 0 && Name[i] == '@') {
    246           if (NameEnd != StringRef::npos) {
    247             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
    248                             SourceMgr::DK_Error,
    249                             "invalid name in named regex definition");
    250             return true;
    251           }
    252           IsExpression = true;
    253           continue;
    254         }
    255         if (Name[i] != '_' && !isalnum(Name[i]) &&
    256             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
    257           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
    258                           SourceMgr::DK_Error, "invalid name in named regex");
    259           return true;
    260         }
    261       }
    262 
    263       // Name can't start with a digit.
    264       if (isdigit(static_cast<unsigned char>(Name[0]))) {
    265         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
    266                         "invalid name in named regex");
    267         return true;
    268       }
    269 
    270       // Handle [[foo]].
    271       if (NameEnd == StringRef::npos) {
    272         // Handle variables that were defined earlier on the same line by
    273         // emitting a backreference.
    274         if (VariableDefs.find(Name) != VariableDefs.end()) {
    275           unsigned VarParenNum = VariableDefs[Name];
    276           if (VarParenNum < 1 || VarParenNum > 9) {
    277             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
    278                             SourceMgr::DK_Error,
    279                             "Can't back-reference more than 9 variables");
    280             return true;
    281           }
    282           AddBackrefToRegEx(VarParenNum);
    283         } else {
    284           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
    285         }
    286         continue;
    287       }
    288 
    289       // Handle [[foo:.*]].
    290       VariableDefs[Name] = CurParen;
    291       RegExStr += '(';
    292       ++CurParen;
    293 
    294       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
    295         return true;
    296 
    297       RegExStr += ')';
    298     }
    299 
    300     // Handle fixed string matches.
    301     // Find the end, which is the start of the next regex.
    302     size_t FixedMatchEnd = PatternStr.find("{{");
    303     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
    304     AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr);
    305     PatternStr = PatternStr.substr(FixedMatchEnd);
    306   }
    307 
    308   return false;
    309 }
    310 
    311 void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) {
    312   // Add the characters from FixedStr to the regex, escaping as needed.  This
    313   // avoids "leaning toothpicks" in common patterns.
    314   for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) {
    315     switch (FixedStr[i]) {
    316     // These are the special characters matched in "p_ere_exp".
    317     case '(':
    318     case ')':
    319     case '^':
    320     case '$':
    321     case '|':
    322     case '*':
    323     case '+':
    324     case '?':
    325     case '.':
    326     case '[':
    327     case '\\':
    328     case '{':
    329       TheStr += '\\';
    330       // FALL THROUGH.
    331     default:
    332       TheStr += FixedStr[i];
    333       break;
    334     }
    335   }
    336 }
    337 
    338 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
    339                               SourceMgr &SM) {
    340   Regex R(RS);
    341   std::string Error;
    342   if (!R.isValid(Error)) {
    343     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
    344                     "invalid regex: " + Error);
    345     return true;
    346   }
    347 
    348   RegExStr += RS.str();
    349   CurParen += R.getNumMatches();
    350   return false;
    351 }
    352 
    353 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
    354   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
    355   std::string Backref = std::string("\\") +
    356                         std::string(1, '0' + BackrefNum);
    357   RegExStr += Backref;
    358 }
    359 
    360 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
    361   // The only supported expression is @LINE([\+-]\d+)?
    362   if (!Expr.startswith("@LINE"))
    363     return false;
    364   Expr = Expr.substr(StringRef("@LINE").size());
    365   int Offset = 0;
    366   if (!Expr.empty()) {
    367     if (Expr[0] == '+')
    368       Expr = Expr.substr(1);
    369     else if (Expr[0] != '-')
    370       return false;
    371     if (Expr.getAsInteger(10, Offset))
    372       return false;
    373   }
    374   Value = llvm::itostr(LineNumber + Offset);
    375   return true;
    376 }
    377 
    378 /// Match - Match the pattern string against the input buffer Buffer.  This
    379 /// returns the position that is matched or npos if there is no match.  If
    380 /// there is a match, the size of the matched string is returned in MatchLen.
    381 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
    382                       StringMap<StringRef> &VariableTable) const {
    383   // If this is the EOF pattern, match it immediately.
    384   if (MatchEOF) {
    385     MatchLen = 0;
    386     return Buffer.size();
    387   }
    388 
    389   // If this is a fixed string pattern, just match it now.
    390   if (!FixedStr.empty()) {
    391     MatchLen = FixedStr.size();
    392     return Buffer.find(FixedStr);
    393   }
    394 
    395   // Regex match.
    396 
    397   // If there are variable uses, we need to create a temporary string with the
    398   // actual value.
    399   StringRef RegExToMatch = RegExStr;
    400   std::string TmpStr;
    401   if (!VariableUses.empty()) {
    402     TmpStr = RegExStr;
    403 
    404     unsigned InsertOffset = 0;
    405     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
    406       std::string Value;
    407 
    408       if (VariableUses[i].first[0] == '@') {
    409         if (!EvaluateExpression(VariableUses[i].first, Value))
    410           return StringRef::npos;
    411       } else {
    412         StringMap<StringRef>::iterator it =
    413           VariableTable.find(VariableUses[i].first);
    414         // If the variable is undefined, return an error.
    415         if (it == VariableTable.end())
    416           return StringRef::npos;
    417 
    418         // Look up the value and escape it so that we can plop it into the regex.
    419         AddFixedStringToRegEx(it->second, Value);
    420       }
    421 
    422       // Plop it into the regex at the adjusted offset.
    423       TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
    424                     Value.begin(), Value.end());
    425       InsertOffset += Value.size();
    426     }
    427 
    428     // Match the newly constructed regex.
    429     RegExToMatch = TmpStr;
    430   }
    431 
    432 
    433   SmallVector<StringRef, 4> MatchInfo;
    434   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
    435     return StringRef::npos;
    436 
    437   // Successful regex match.
    438   assert(!MatchInfo.empty() && "Didn't get any match");
    439   StringRef FullMatch = MatchInfo[0];
    440 
    441   // If this defines any variables, remember their values.
    442   for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
    443                                                      E = VariableDefs.end();
    444        I != E; ++I) {
    445     assert(I->second < MatchInfo.size() && "Internal paren error");
    446     VariableTable[I->first] = MatchInfo[I->second];
    447   }
    448 
    449   MatchLen = FullMatch.size();
    450   return FullMatch.data()-Buffer.data();
    451 }
    452 
    453 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
    454                               const StringMap<StringRef> &VariableTable) const {
    455   // Just compute the number of matching characters. For regular expressions, we
    456   // just compare against the regex itself and hope for the best.
    457   //
    458   // FIXME: One easy improvement here is have the regex lib generate a single
    459   // example regular expression which matches, and use that as the example
    460   // string.
    461   StringRef ExampleString(FixedStr);
    462   if (ExampleString.empty())
    463     ExampleString = RegExStr;
    464 
    465   // Only compare up to the first line in the buffer, or the string size.
    466   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
    467   BufferPrefix = BufferPrefix.split('\n').first;
    468   return BufferPrefix.edit_distance(ExampleString);
    469 }
    470 
    471 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
    472                                const StringMap<StringRef> &VariableTable) const{
    473   // If this was a regular expression using variables, print the current
    474   // variable values.
    475   if (!VariableUses.empty()) {
    476     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
    477       SmallString<256> Msg;
    478       raw_svector_ostream OS(Msg);
    479       StringRef Var = VariableUses[i].first;
    480       if (Var[0] == '@') {
    481         std::string Value;
    482         if (EvaluateExpression(Var, Value)) {
    483           OS << "with expression \"";
    484           OS.write_escaped(Var) << "\" equal to \"";
    485           OS.write_escaped(Value) << "\"";
    486         } else {
    487           OS << "uses incorrect expression \"";
    488           OS.write_escaped(Var) << "\"";
    489         }
    490       } else {
    491         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
    492 
    493         // Check for undefined variable references.
    494         if (it == VariableTable.end()) {
    495           OS << "uses undefined variable \"";
    496           OS.write_escaped(Var) << "\"";
    497         } else {
    498           OS << "with variable \"";
    499           OS.write_escaped(Var) << "\" equal to \"";
    500           OS.write_escaped(it->second) << "\"";
    501         }
    502       }
    503 
    504       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
    505                       OS.str());
    506     }
    507   }
    508 
    509   // Attempt to find the closest/best fuzzy match.  Usually an error happens
    510   // because some string in the output didn't exactly match. In these cases, we
    511   // would like to show the user a best guess at what "should have" matched, to
    512   // save them having to actually check the input manually.
    513   size_t NumLinesForward = 0;
    514   size_t Best = StringRef::npos;
    515   double BestQuality = 0;
    516 
    517   // Use an arbitrary 4k limit on how far we will search.
    518   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
    519     if (Buffer[i] == '\n')
    520       ++NumLinesForward;
    521 
    522     // Patterns have leading whitespace stripped, so skip whitespace when
    523     // looking for something which looks like a pattern.
    524     if (Buffer[i] == ' ' || Buffer[i] == '\t')
    525       continue;
    526 
    527     // Compute the "quality" of this match as an arbitrary combination of the
    528     // match distance and the number of lines skipped to get to this match.
    529     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
    530     double Quality = Distance + (NumLinesForward / 100.);
    531 
    532     if (Quality < BestQuality || Best == StringRef::npos) {
    533       Best = i;
    534       BestQuality = Quality;
    535     }
    536   }
    537 
    538   // Print the "possible intended match here" line if we found something
    539   // reasonable and not equal to what we showed in the "scanning from here"
    540   // line.
    541   if (Best && Best != StringRef::npos && BestQuality < 50) {
    542       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
    543                       SourceMgr::DK_Note, "possible intended match here");
    544 
    545     // FIXME: If we wanted to be really friendly we would show why the match
    546     // failed, as it can be hard to spot simple one character differences.
    547   }
    548 }
    549 
    550 size_t Pattern::FindRegexVarEnd(StringRef Str) {
    551   // Offset keeps track of the current offset within the input Str
    552   size_t Offset = 0;
    553   // [...] Nesting depth
    554   size_t BracketDepth = 0;
    555 
    556   while (!Str.empty()) {
    557     if (Str.startswith("]]") && BracketDepth == 0)
    558       return Offset;
    559     if (Str[0] == '\\') {
    560       // Backslash escapes the next char within regexes, so skip them both.
    561       Str = Str.substr(2);
    562       Offset += 2;
    563     } else {
    564       switch (Str[0]) {
    565         default:
    566           break;
    567         case '[':
    568           BracketDepth++;
    569           break;
    570         case ']':
    571           assert(BracketDepth > 0 && "Invalid regex");
    572           BracketDepth--;
    573           break;
    574       }
    575       Str = Str.substr(1);
    576       Offset++;
    577     }
    578   }
    579 
    580   return StringRef::npos;
    581 }
    582 
    583 
    584 //===----------------------------------------------------------------------===//
    585 // Check Strings.
    586 //===----------------------------------------------------------------------===//
    587 
    588 /// CheckString - This is a check that we found in the input file.
    589 struct CheckString {
    590   /// Pat - The pattern to match.
    591   Pattern Pat;
    592 
    593   /// Loc - The location in the match file that the check string was specified.
    594   SMLoc Loc;
    595 
    596   /// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed
    597   /// to a CHECK: directive.
    598   bool IsCheckNext;
    599 
    600   /// IsCheckLabel - This is true if this is a CHECK-LABEL: directive (as
    601   /// opposed to a CHECK: directive.
    602   bool IsCheckLabel;
    603 
    604   /// DagNotStrings - These are all of the strings that are disallowed from
    605   /// occurring between this match string and the previous one (or start of
    606   /// file).
    607   std::vector<Pattern> DagNotStrings;
    608 
    609   CheckString(const Pattern &P, SMLoc L, bool isCheckNext, bool isCheckLabel)
    610     : Pat(P), Loc(L), IsCheckNext(isCheckNext), IsCheckLabel(isCheckLabel) {}
    611 
    612   /// Check - Match check string and its "not strings" and/or "dag strings".
    613   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabel,
    614                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
    615 
    616   /// CheckNext - Verify there is a single line in the given buffer.
    617   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
    618 
    619   /// CheckNot - Verify there's no "not strings" in the given buffer.
    620   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
    621                 const std::vector<const Pattern *> &NotStrings,
    622                 StringMap<StringRef> &VariableTable) const;
    623 
    624   /// CheckDag - Match "dag strings" and their mixed "not strings".
    625   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
    626                   std::vector<const Pattern *> &NotStrings,
    627                   StringMap<StringRef> &VariableTable) const;
    628 };
    629 
    630 /// Canonicalize whitespaces in the input file. Line endings are replaced
    631 /// with UNIX-style '\n'.
    632 ///
    633 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
    634 /// characters to a single space.
    635 static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB,
    636                                            bool PreserveHorizontal) {
    637   SmallString<128> NewFile;
    638   NewFile.reserve(MB->getBufferSize());
    639 
    640   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
    641        Ptr != End; ++Ptr) {
    642     // Eliminate trailing dosish \r.
    643     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
    644       continue;
    645     }
    646 
    647     // If current char is not a horizontal whitespace or if horizontal
    648     // whitespace canonicalization is disabled, dump it to output as is.
    649     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
    650       NewFile.push_back(*Ptr);
    651       continue;
    652     }
    653 
    654     // Otherwise, add one space and advance over neighboring space.
    655     NewFile.push_back(' ');
    656     while (Ptr+1 != End &&
    657            (Ptr[1] == ' ' || Ptr[1] == '\t'))
    658       ++Ptr;
    659   }
    660 
    661   // Free the old buffer and return a new one.
    662   MemoryBuffer *MB2 =
    663     MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier());
    664 
    665   delete MB;
    666   return MB2;
    667 }
    668 
    669 
    670 /// ReadCheckFile - Read the check file, which specifies the sequence of
    671 /// expected strings.  The strings are added to the CheckStrings vector.
    672 /// Returns true in case of an error, false otherwise.
    673 static bool ReadCheckFile(SourceMgr &SM,
    674                           std::vector<CheckString> &CheckStrings) {
    675   OwningPtr<MemoryBuffer> File;
    676   if (error_code ec =
    677         MemoryBuffer::getFileOrSTDIN(CheckFilename, File)) {
    678     errs() << "Could not open check file '" << CheckFilename << "': "
    679            << ec.message() << '\n';
    680     return true;
    681   }
    682 
    683   // If we want to canonicalize whitespace, strip excess whitespace from the
    684   // buffer containing the CHECK lines. Remove DOS style line endings.
    685   MemoryBuffer *F =
    686     CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace);
    687 
    688   SM.AddNewSourceBuffer(F, SMLoc());
    689 
    690   // Find all instances of CheckPrefix followed by : in the file.
    691   StringRef Buffer = F->getBuffer();
    692   std::vector<Pattern> DagNotMatches;
    693 
    694   // LineNumber keeps track of the line on which CheckPrefix instances are
    695   // found.
    696   unsigned LineNumber = 1;
    697 
    698   while (1) {
    699     // See if Prefix occurs in the memory buffer.
    700     size_t PrefixLoc = Buffer.find(CheckPrefix);
    701     // If we didn't find a match, we're done.
    702     if (PrefixLoc == StringRef::npos)
    703       break;
    704 
    705     LineNumber += Buffer.substr(0, PrefixLoc).count('\n');
    706 
    707     Buffer = Buffer.substr(PrefixLoc);
    708 
    709     const char *CheckPrefixStart = Buffer.data();
    710 
    711     // When we find a check prefix, keep track of whether we find CHECK: or
    712     // CHECK-NEXT:
    713     bool IsCheckNext = false, IsCheckNot = false, IsCheckDag = false,
    714          IsCheckLabel = false;
    715 
    716     // Verify that the : is present after the prefix.
    717     if (Buffer[CheckPrefix.size()] == ':') {
    718       Buffer = Buffer.substr(CheckPrefix.size()+1);
    719     } else if (Buffer.size() > CheckPrefix.size()+6 &&
    720                memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) {
    721       Buffer = Buffer.substr(CheckPrefix.size()+6);
    722       IsCheckNext = true;
    723     } else if (Buffer.size() > CheckPrefix.size()+5 &&
    724                memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) {
    725       Buffer = Buffer.substr(CheckPrefix.size()+5);
    726       IsCheckNot = true;
    727     } else if (Buffer.size() > CheckPrefix.size()+5 &&
    728                memcmp(Buffer.data()+CheckPrefix.size(), "-DAG:", 5) == 0) {
    729       Buffer = Buffer.substr(CheckPrefix.size()+5);
    730       IsCheckDag = true;
    731     } else if (Buffer.size() > CheckPrefix.size()+7 &&
    732                memcmp(Buffer.data()+CheckPrefix.size(), "-LABEL:", 7) == 0) {
    733       Buffer = Buffer.substr(CheckPrefix.size()+7);
    734       IsCheckLabel = true;
    735     } else {
    736       Buffer = Buffer.substr(1);
    737       continue;
    738     }
    739 
    740     // Okay, we found the prefix, yay.  Remember the rest of the line, but
    741     // ignore leading and trailing whitespace.
    742     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
    743 
    744     // Scan ahead to the end of line.
    745     size_t EOL = Buffer.find_first_of("\n\r");
    746 
    747     // Remember the location of the start of the pattern, for diagnostics.
    748     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
    749 
    750     // Parse the pattern.
    751     Pattern P;
    752     if (P.ParsePattern(Buffer.substr(0, EOL), SM, LineNumber))
    753       return true;
    754 
    755     // Verify that CHECK-LABEL lines do not define or use variables
    756     if (IsCheckLabel && P.hasVariable()) {
    757       SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
    758                       SourceMgr::DK_Error,
    759                       "found '"+CheckPrefix+"-LABEL:' with variable definition"
    760                       " or use'");
    761       return true;
    762     }
    763 
    764     P.setMatchNot(IsCheckNot);
    765     P.setMatchDag(IsCheckDag);
    766 
    767     Buffer = Buffer.substr(EOL);
    768 
    769     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
    770     if (IsCheckNext && CheckStrings.empty()) {
    771       SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
    772                       SourceMgr::DK_Error,
    773                       "found '"+CheckPrefix+"-NEXT:' without previous '"+
    774                       CheckPrefix+ ": line");
    775       return true;
    776     }
    777 
    778     // Handle CHECK-DAG/-NOT.
    779     if (IsCheckDag || IsCheckNot) {
    780       DagNotMatches.push_back(P);
    781       continue;
    782     }
    783 
    784     // Okay, add the string we captured to the output vector and move on.
    785     CheckStrings.push_back(CheckString(P,
    786                                        PatternLoc,
    787                                        IsCheckNext,
    788                                        IsCheckLabel));
    789     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
    790   }
    791 
    792   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs.
    793   if (!DagNotMatches.empty()) {
    794     CheckStrings.push_back(CheckString(Pattern(true),
    795                                        SMLoc::getFromPointer(Buffer.data()),
    796                                        false,
    797                                        false));
    798     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
    799   }
    800 
    801   if (CheckStrings.empty()) {
    802     errs() << "error: no check strings found with prefix '" << CheckPrefix
    803            << ":'\n";
    804     return true;
    805   }
    806 
    807   return false;
    808 }
    809 
    810 static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
    811                              const Pattern &Pat, StringRef Buffer,
    812                              StringMap<StringRef> &VariableTable) {
    813   // Otherwise, we have an error, emit an error message.
    814   SM.PrintMessage(Loc, SourceMgr::DK_Error,
    815                   "expected string not found in input");
    816 
    817   // Print the "scanning from here" line.  If the current position is at the
    818   // end of a line, advance to the start of the next line.
    819   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
    820 
    821   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
    822                   "scanning from here");
    823 
    824   // Allow the pattern to print additional information if desired.
    825   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
    826 }
    827 
    828 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
    829                              StringRef Buffer,
    830                              StringMap<StringRef> &VariableTable) {
    831   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
    832 }
    833 
    834 /// CountNumNewlinesBetween - Count the number of newlines in the specified
    835 /// range.
    836 static unsigned CountNumNewlinesBetween(StringRef Range) {
    837   unsigned NumNewLines = 0;
    838   while (1) {
    839     // Scan for newline.
    840     Range = Range.substr(Range.find_first_of("\n\r"));
    841     if (Range.empty()) return NumNewLines;
    842 
    843     ++NumNewLines;
    844 
    845     // Handle \n\r and \r\n as a single newline.
    846     if (Range.size() > 1 &&
    847         (Range[1] == '\n' || Range[1] == '\r') &&
    848         (Range[0] != Range[1]))
    849       Range = Range.substr(1);
    850     Range = Range.substr(1);
    851   }
    852 }
    853 
    854 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
    855                           bool IsLabel, size_t &MatchLen,
    856                           StringMap<StringRef> &VariableTable) const {
    857   size_t LastPos = 0;
    858   std::vector<const Pattern *> NotStrings;
    859 
    860   if (!IsLabel) {
    861     // Match "dag strings" (with mixed "not strings" if any).
    862     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
    863     if (LastPos == StringRef::npos)
    864       return StringRef::npos;
    865   }
    866 
    867   // Match itself from the last position after matching CHECK-DAG.
    868   StringRef MatchBuffer = Buffer.substr(LastPos);
    869   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
    870   if (MatchPos == StringRef::npos) {
    871     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
    872     return StringRef::npos;
    873   }
    874   MatchPos += LastPos;
    875 
    876   if (!IsLabel) {
    877     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
    878 
    879     // If this check is a "CHECK-NEXT", verify that the previous match was on
    880     // the previous line (i.e. that there is one newline between them).
    881     if (CheckNext(SM, SkippedRegion))
    882       return StringRef::npos;
    883 
    884     // If this match had "not strings", verify that they don't exist in the
    885     // skipped region.
    886     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
    887       return StringRef::npos;
    888   }
    889 
    890   return MatchPos;
    891 }
    892 
    893 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
    894   if (!IsCheckNext)
    895     return false;
    896 
    897   // Count the number of newlines between the previous match and this one.
    898   assert(Buffer.data() !=
    899          SM.getMemoryBuffer(
    900            SM.FindBufferContainingLoc(
    901              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
    902          "CHECK-NEXT can't be the first check in a file");
    903 
    904   unsigned NumNewLines = CountNumNewlinesBetween(Buffer);
    905 
    906   if (NumNewLines == 0) {
    907     SM.PrintMessage(Loc, SourceMgr::DK_Error, CheckPrefix+
    908                     "-NEXT: is on the same line as previous match");
    909     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
    910                     SourceMgr::DK_Note, "'next' match was here");
    911     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
    912                     "previous match ended here");
    913     return true;
    914   }
    915 
    916   if (NumNewLines != 1) {
    917     SM.PrintMessage(Loc, SourceMgr::DK_Error, CheckPrefix+
    918                     "-NEXT: is not on the line after the previous match");
    919     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
    920                     SourceMgr::DK_Note, "'next' match was here");
    921     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
    922                     "previous match ended here");
    923     return true;
    924   }
    925 
    926   return false;
    927 }
    928 
    929 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
    930                            const std::vector<const Pattern *> &NotStrings,
    931                            StringMap<StringRef> &VariableTable) const {
    932   for (unsigned ChunkNo = 0, e = NotStrings.size();
    933        ChunkNo != e; ++ChunkNo) {
    934     const Pattern *Pat = NotStrings[ChunkNo];
    935     assert(Pat->getMatchNot() && "Expect CHECK-NOT!");
    936 
    937     size_t MatchLen = 0;
    938     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
    939 
    940     if (Pos == StringRef::npos) continue;
    941 
    942     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
    943                     SourceMgr::DK_Error,
    944                     CheckPrefix+"-NOT: string occurred!");
    945     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
    946                     CheckPrefix+"-NOT: pattern specified here");
    947     return true;
    948   }
    949 
    950   return false;
    951 }
    952 
    953 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
    954                              std::vector<const Pattern *> &NotStrings,
    955                              StringMap<StringRef> &VariableTable) const {
    956   if (DagNotStrings.empty())
    957     return 0;
    958 
    959   size_t LastPos = 0;
    960   size_t StartPos = LastPos;
    961 
    962   for (unsigned ChunkNo = 0, e = DagNotStrings.size();
    963        ChunkNo != e; ++ChunkNo) {
    964     const Pattern &Pat = DagNotStrings[ChunkNo];
    965 
    966     assert((Pat.getMatchDag() ^ Pat.getMatchNot()) &&
    967            "Invalid CHECK-DAG or CHECK-NOT!");
    968 
    969     if (Pat.getMatchNot()) {
    970       NotStrings.push_back(&Pat);
    971       continue;
    972     }
    973 
    974     assert(Pat.getMatchDag() && "Expect CHECK-DAG!");
    975 
    976     size_t MatchLen = 0, MatchPos;
    977 
    978     // CHECK-DAG always matches from the start.
    979     StringRef MatchBuffer = Buffer.substr(StartPos);
    980     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
    981     // With a group of CHECK-DAGs, a single mismatching means the match on
    982     // that group of CHECK-DAGs fails immediately.
    983     if (MatchPos == StringRef::npos) {
    984       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
    985       return StringRef::npos;
    986     }
    987     // Re-calc it as the offset relative to the start of the original string.
    988     MatchPos += StartPos;
    989 
    990     if (!NotStrings.empty()) {
    991       if (MatchPos < LastPos) {
    992         // Reordered?
    993         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
    994                         SourceMgr::DK_Error,
    995                         CheckPrefix+"-DAG: found a match of CHECK-DAG"
    996                         " reordering across a CHECK-NOT");
    997         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
    998                         SourceMgr::DK_Note,
    999                         CheckPrefix+"-DAG: the farthest match of CHECK-DAG"
   1000                         " is found here");
   1001         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
   1002                         CheckPrefix+"-NOT: the crossed pattern specified"
   1003                         " here");
   1004         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
   1005                         CheckPrefix+"-DAG: the reordered pattern specified"
   1006                         " here");
   1007         return StringRef::npos;
   1008       }
   1009       // All subsequent CHECK-DAGs should be matched from the farthest
   1010       // position of all precedent CHECK-DAGs (including this one.)
   1011       StartPos = LastPos;
   1012       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
   1013       // CHECK-DAG, verify that there's no 'not' strings occurred in that
   1014       // region.
   1015       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
   1016       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
   1017         return StringRef::npos;
   1018       // Clear "not strings".
   1019       NotStrings.clear();
   1020     }
   1021 
   1022     // Update the last position with CHECK-DAG matches.
   1023     LastPos = std::max(MatchPos + MatchLen, LastPos);
   1024   }
   1025 
   1026   return LastPos;
   1027 }
   1028 
   1029 int main(int argc, char **argv) {
   1030   sys::PrintStackTraceOnErrorSignal();
   1031   PrettyStackTraceProgram X(argc, argv);
   1032   cl::ParseCommandLineOptions(argc, argv);
   1033 
   1034   SourceMgr SM;
   1035 
   1036   // Read the expected strings from the check file.
   1037   std::vector<CheckString> CheckStrings;
   1038   if (ReadCheckFile(SM, CheckStrings))
   1039     return 2;
   1040 
   1041   // Open the file to check and add it to SourceMgr.
   1042   OwningPtr<MemoryBuffer> File;
   1043   if (error_code ec =
   1044         MemoryBuffer::getFileOrSTDIN(InputFilename, File)) {
   1045     errs() << "Could not open input file '" << InputFilename << "': "
   1046            << ec.message() << '\n';
   1047     return 2;
   1048   }
   1049 
   1050   if (File->getBufferSize() == 0) {
   1051     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
   1052     return 2;
   1053   }
   1054 
   1055   // Remove duplicate spaces in the input file if requested.
   1056   // Remove DOS style line endings.
   1057   MemoryBuffer *F =
   1058     CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace);
   1059 
   1060   SM.AddNewSourceBuffer(F, SMLoc());
   1061 
   1062   /// VariableTable - This holds all the current filecheck variables.
   1063   StringMap<StringRef> VariableTable;
   1064 
   1065   // Check that we have all of the expected strings, in order, in the input
   1066   // file.
   1067   StringRef Buffer = F->getBuffer();
   1068 
   1069   bool hasError = false;
   1070 
   1071   unsigned i = 0, j = 0, e = CheckStrings.size();
   1072 
   1073   while (true) {
   1074     StringRef CheckRegion;
   1075     if (j == e) {
   1076       CheckRegion = Buffer;
   1077     } else {
   1078       const CheckString &CheckLabelStr = CheckStrings[j];
   1079       if (!CheckLabelStr.IsCheckLabel) {
   1080         ++j;
   1081         continue;
   1082       }
   1083 
   1084       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
   1085       size_t MatchLabelLen = 0;
   1086       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
   1087                                                  MatchLabelLen, VariableTable);
   1088       if (MatchLabelPos == StringRef::npos) {
   1089         hasError = true;
   1090         break;
   1091       }
   1092 
   1093       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
   1094       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
   1095       ++j;
   1096     }
   1097 
   1098     for ( ; i != j; ++i) {
   1099       const CheckString &CheckStr = CheckStrings[i];
   1100 
   1101       // Check each string within the scanned region, including a second check
   1102       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
   1103       size_t MatchLen = 0;
   1104       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
   1105                                        VariableTable);
   1106 
   1107       if (MatchPos == StringRef::npos) {
   1108         hasError = true;
   1109         i = j;
   1110         break;
   1111       }
   1112 
   1113       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
   1114     }
   1115 
   1116     if (j == e)
   1117       break;
   1118   }
   1119 
   1120   return hasError ? 1 : 0;
   1121 }
   1122