Home | History | Annotate | Download | only in FileCheck
      1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // FileCheck does a line-by line check of a file that validates whether it
     11 // contains the expected content.  This is useful for regression tests etc.
     12 //
     13 // This program exits with an error status of 2 on error, exit status of 0 if
     14 // the file matched the expected contents, and exit status of 1 if it did not
     15 // contain the expected contents.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "llvm/ADT/SmallString.h"
     20 #include "llvm/ADT/StringExtras.h"
     21 #include "llvm/ADT/StringMap.h"
     22 #include "llvm/ADT/StringSet.h"
     23 #include "llvm/Support/CommandLine.h"
     24 #include "llvm/Support/MemoryBuffer.h"
     25 #include "llvm/Support/PrettyStackTrace.h"
     26 #include "llvm/Support/Regex.h"
     27 #include "llvm/Support/Signals.h"
     28 #include "llvm/Support/SourceMgr.h"
     29 #include "llvm/Support/raw_ostream.h"
     30 #include <algorithm>
     31 #include <cctype>
     32 #include <map>
     33 #include <string>
     34 #include <system_error>
     35 #include <vector>
     36 using namespace llvm;
     37 
     38 static cl::opt<std::string>
     39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
     40 
     41 static cl::opt<std::string>
     42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
     43               cl::init("-"), cl::value_desc("filename"));
     44 
     45 static cl::list<std::string>
     46 CheckPrefixes("check-prefix",
     47               cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
     48 
     49 static cl::opt<bool>
     50 NoCanonicalizeWhiteSpace("strict-whitespace",
     51               cl::desc("Do not treat all horizontal whitespace as equivalent"));
     52 
     53 static cl::list<std::string> ImplicitCheckNot(
     54     "implicit-check-not",
     55     cl::desc("Add an implicit negative check with this pattern to every\n"
     56              "positive check. This can be used to ensure that no instances of\n"
     57              "this pattern occur which are not matched by a positive pattern"),
     58     cl::value_desc("pattern"));
     59 
     60 static cl::opt<bool> AllowEmptyInput(
     61     "allow-empty", cl::init(false),
     62     cl::desc("Allow the input file to be empty. This is useful when making\n"
     63              "checks that some error message does not occur, for example."));
     64 
     65 typedef cl::list<std::string>::const_iterator prefix_iterator;
     66 
     67 //===----------------------------------------------------------------------===//
     68 // Pattern Handling Code.
     69 //===----------------------------------------------------------------------===//
     70 
     71 namespace Check {
     72   enum CheckType {
     73     CheckNone = 0,
     74     CheckPlain,
     75     CheckNext,
     76     CheckSame,
     77     CheckNot,
     78     CheckDAG,
     79     CheckLabel,
     80 
     81     /// MatchEOF - When set, this pattern only matches the end of file. This is
     82     /// used for trailing CHECK-NOTs.
     83     CheckEOF
     84   };
     85 }
     86 
     87 class Pattern {
     88   SMLoc PatternLoc;
     89 
     90   Check::CheckType CheckTy;
     91 
     92   /// FixedStr - If non-empty, this pattern is a fixed string match with the
     93   /// specified fixed string.
     94   StringRef FixedStr;
     95 
     96   /// RegEx - If non-empty, this is a regex pattern.
     97   std::string RegExStr;
     98 
     99   /// \brief Contains the number of line this pattern is in.
    100   unsigned LineNumber;
    101 
    102   /// VariableUses - Entries in this vector map to uses of a variable in the
    103   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
    104   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
    105   /// value of bar at offset 3.
    106   std::vector<std::pair<StringRef, unsigned> > VariableUses;
    107 
    108   /// VariableDefs - Maps definitions of variables to their parenthesized
    109   /// capture numbers.
    110   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
    111   std::map<StringRef, unsigned> VariableDefs;
    112 
    113 public:
    114 
    115   Pattern(Check::CheckType Ty)
    116     : CheckTy(Ty) { }
    117 
    118   /// getLoc - Return the location in source code.
    119   SMLoc getLoc() const { return PatternLoc; }
    120 
    121   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
    122   /// which prefix is being matched, SM provides the SourceMgr used for error
    123   /// reports, and LineNumber is the line number in the input file from which
    124   /// the pattern string was read.  Returns true in case of an error, false
    125   /// otherwise.
    126   bool ParsePattern(StringRef PatternStr,
    127                     StringRef Prefix,
    128                     SourceMgr &SM,
    129                     unsigned LineNumber);
    130 
    131   /// Match - Match the pattern string against the input buffer Buffer.  This
    132   /// returns the position that is matched or npos if there is no match.  If
    133   /// there is a match, the size of the matched string is returned in MatchLen.
    134   ///
    135   /// The VariableTable StringMap provides the current values of filecheck
    136   /// variables and is updated if this match defines new values.
    137   size_t Match(StringRef Buffer, size_t &MatchLen,
    138                StringMap<StringRef> &VariableTable) const;
    139 
    140   /// PrintFailureInfo - Print additional information about a failure to match
    141   /// involving this pattern.
    142   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
    143                         const StringMap<StringRef> &VariableTable) const;
    144 
    145   bool hasVariable() const { return !(VariableUses.empty() &&
    146                                       VariableDefs.empty()); }
    147 
    148   Check::CheckType getCheckTy() const { return CheckTy; }
    149 
    150 private:
    151   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
    152   void AddBackrefToRegEx(unsigned BackrefNum);
    153 
    154   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
    155   /// matching this pattern at the start of \arg Buffer; a distance of zero
    156   /// should correspond to a perfect match.
    157   unsigned ComputeMatchDistance(StringRef Buffer,
    158                                const StringMap<StringRef> &VariableTable) const;
    159 
    160   /// \brief Evaluates expression and stores the result to \p Value.
    161   /// \return true on success. false when the expression has invalid syntax.
    162   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
    163 
    164   /// \brief Finds the closing sequence of a regex variable usage or
    165   /// definition. Str has to point in the beginning of the definition
    166   /// (right after the opening sequence).
    167   /// \return offset of the closing sequence within Str, or npos if it was not
    168   /// found.
    169   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
    170 };
    171 
    172 
    173 bool Pattern::ParsePattern(StringRef PatternStr,
    174                            StringRef Prefix,
    175                            SourceMgr &SM,
    176                            unsigned LineNumber) {
    177   this->LineNumber = LineNumber;
    178   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
    179 
    180   // Ignore trailing whitespace.
    181   while (!PatternStr.empty() &&
    182          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
    183     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
    184 
    185   // Check that there is something on the line.
    186   if (PatternStr.empty()) {
    187     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
    188                     "found empty check string with prefix '" +
    189                     Prefix + ":'");
    190     return true;
    191   }
    192 
    193   // Check to see if this is a fixed string, or if it has regex pieces.
    194   if (PatternStr.size() < 2 ||
    195       (PatternStr.find("{{") == StringRef::npos &&
    196        PatternStr.find("[[") == StringRef::npos)) {
    197     FixedStr = PatternStr;
    198     return false;
    199   }
    200 
    201   // Paren value #0 is for the fully matched string.  Any new parenthesized
    202   // values add from there.
    203   unsigned CurParen = 1;
    204 
    205   // Otherwise, there is at least one regex piece.  Build up the regex pattern
    206   // by escaping scary characters in fixed strings, building up one big regex.
    207   while (!PatternStr.empty()) {
    208     // RegEx matches.
    209     if (PatternStr.startswith("{{")) {
    210       // This is the start of a regex match.  Scan for the }}.
    211       size_t End = PatternStr.find("}}");
    212       if (End == StringRef::npos) {
    213         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
    214                         SourceMgr::DK_Error,
    215                         "found start of regex string with no end '}}'");
    216         return true;
    217       }
    218 
    219       // Enclose {{}} patterns in parens just like [[]] even though we're not
    220       // capturing the result for any purpose.  This is required in case the
    221       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
    222       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
    223       RegExStr += '(';
    224       ++CurParen;
    225 
    226       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
    227         return true;
    228       RegExStr += ')';
    229 
    230       PatternStr = PatternStr.substr(End+2);
    231       continue;
    232     }
    233 
    234     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
    235     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
    236     // second form is [[foo]] which is a reference to foo.  The variable name
    237     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
    238     // it.  This is to catch some common errors.
    239     if (PatternStr.startswith("[[")) {
    240       // Find the closing bracket pair ending the match.  End is going to be an
    241       // offset relative to the beginning of the match string.
    242       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
    243 
    244       if (End == StringRef::npos) {
    245         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
    246                         SourceMgr::DK_Error,
    247                         "invalid named regex reference, no ]] found");
    248         return true;
    249       }
    250 
    251       StringRef MatchStr = PatternStr.substr(2, End);
    252       PatternStr = PatternStr.substr(End+4);
    253 
    254       // Get the regex name (e.g. "foo").
    255       size_t NameEnd = MatchStr.find(':');
    256       StringRef Name = MatchStr.substr(0, NameEnd);
    257 
    258       if (Name.empty()) {
    259         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
    260                         "invalid name in named regex: empty name");
    261         return true;
    262       }
    263 
    264       // Verify that the name/expression is well formed. FileCheck currently
    265       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
    266       // is relaxed, more strict check is performed in \c EvaluateExpression.
    267       bool IsExpression = false;
    268       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
    269         if (i == 0 && Name[i] == '@') {
    270           if (NameEnd != StringRef::npos) {
    271             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
    272                             SourceMgr::DK_Error,
    273                             "invalid name in named regex definition");
    274             return true;
    275           }
    276           IsExpression = true;
    277           continue;
    278         }
    279         if (Name[i] != '_' && !isalnum(Name[i]) &&
    280             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
    281           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
    282                           SourceMgr::DK_Error, "invalid name in named regex");
    283           return true;
    284         }
    285       }
    286 
    287       // Name can't start with a digit.
    288       if (isdigit(static_cast<unsigned char>(Name[0]))) {
    289         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
    290                         "invalid name in named regex");
    291         return true;
    292       }
    293 
    294       // Handle [[foo]].
    295       if (NameEnd == StringRef::npos) {
    296         // Handle variables that were defined earlier on the same line by
    297         // emitting a backreference.
    298         if (VariableDefs.find(Name) != VariableDefs.end()) {
    299           unsigned VarParenNum = VariableDefs[Name];
    300           if (VarParenNum < 1 || VarParenNum > 9) {
    301             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
    302                             SourceMgr::DK_Error,
    303                             "Can't back-reference more than 9 variables");
    304             return true;
    305           }
    306           AddBackrefToRegEx(VarParenNum);
    307         } else {
    308           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
    309         }
    310         continue;
    311       }
    312 
    313       // Handle [[foo:.*]].
    314       VariableDefs[Name] = CurParen;
    315       RegExStr += '(';
    316       ++CurParen;
    317 
    318       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
    319         return true;
    320 
    321       RegExStr += ')';
    322     }
    323 
    324     // Handle fixed string matches.
    325     // Find the end, which is the start of the next regex.
    326     size_t FixedMatchEnd = PatternStr.find("{{");
    327     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
    328     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
    329     PatternStr = PatternStr.substr(FixedMatchEnd);
    330   }
    331 
    332   return false;
    333 }
    334 
    335 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
    336                               SourceMgr &SM) {
    337   Regex R(RS);
    338   std::string Error;
    339   if (!R.isValid(Error)) {
    340     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
    341                     "invalid regex: " + Error);
    342     return true;
    343   }
    344 
    345   RegExStr += RS.str();
    346   CurParen += R.getNumMatches();
    347   return false;
    348 }
    349 
    350 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
    351   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
    352   std::string Backref = std::string("\\") +
    353                         std::string(1, '0' + BackrefNum);
    354   RegExStr += Backref;
    355 }
    356 
    357 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
    358   // The only supported expression is @LINE([\+-]\d+)?
    359   if (!Expr.startswith("@LINE"))
    360     return false;
    361   Expr = Expr.substr(StringRef("@LINE").size());
    362   int Offset = 0;
    363   if (!Expr.empty()) {
    364     if (Expr[0] == '+')
    365       Expr = Expr.substr(1);
    366     else if (Expr[0] != '-')
    367       return false;
    368     if (Expr.getAsInteger(10, Offset))
    369       return false;
    370   }
    371   Value = llvm::itostr(LineNumber + Offset);
    372   return true;
    373 }
    374 
    375 /// Match - Match the pattern string against the input buffer Buffer.  This
    376 /// returns the position that is matched or npos if there is no match.  If
    377 /// there is a match, the size of the matched string is returned in MatchLen.
    378 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
    379                       StringMap<StringRef> &VariableTable) const {
    380   // If this is the EOF pattern, match it immediately.
    381   if (CheckTy == Check::CheckEOF) {
    382     MatchLen = 0;
    383     return Buffer.size();
    384   }
    385 
    386   // If this is a fixed string pattern, just match it now.
    387   if (!FixedStr.empty()) {
    388     MatchLen = FixedStr.size();
    389     return Buffer.find(FixedStr);
    390   }
    391 
    392   // Regex match.
    393 
    394   // If there are variable uses, we need to create a temporary string with the
    395   // actual value.
    396   StringRef RegExToMatch = RegExStr;
    397   std::string TmpStr;
    398   if (!VariableUses.empty()) {
    399     TmpStr = RegExStr;
    400 
    401     unsigned InsertOffset = 0;
    402     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
    403       std::string Value;
    404 
    405       if (VariableUses[i].first[0] == '@') {
    406         if (!EvaluateExpression(VariableUses[i].first, Value))
    407           return StringRef::npos;
    408       } else {
    409         StringMap<StringRef>::iterator it =
    410           VariableTable.find(VariableUses[i].first);
    411         // If the variable is undefined, return an error.
    412         if (it == VariableTable.end())
    413           return StringRef::npos;
    414 
    415         // Look up the value and escape it so that we can put it into the regex.
    416         Value += Regex::escape(it->second);
    417       }
    418 
    419       // Plop it into the regex at the adjusted offset.
    420       TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
    421                     Value.begin(), Value.end());
    422       InsertOffset += Value.size();
    423     }
    424 
    425     // Match the newly constructed regex.
    426     RegExToMatch = TmpStr;
    427   }
    428 
    429 
    430   SmallVector<StringRef, 4> MatchInfo;
    431   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
    432     return StringRef::npos;
    433 
    434   // Successful regex match.
    435   assert(!MatchInfo.empty() && "Didn't get any match");
    436   StringRef FullMatch = MatchInfo[0];
    437 
    438   // If this defines any variables, remember their values.
    439   for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
    440                                                      E = VariableDefs.end();
    441        I != E; ++I) {
    442     assert(I->second < MatchInfo.size() && "Internal paren error");
    443     VariableTable[I->first] = MatchInfo[I->second];
    444   }
    445 
    446   MatchLen = FullMatch.size();
    447   return FullMatch.data()-Buffer.data();
    448 }
    449 
    450 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
    451                               const StringMap<StringRef> &VariableTable) const {
    452   // Just compute the number of matching characters. For regular expressions, we
    453   // just compare against the regex itself and hope for the best.
    454   //
    455   // FIXME: One easy improvement here is have the regex lib generate a single
    456   // example regular expression which matches, and use that as the example
    457   // string.
    458   StringRef ExampleString(FixedStr);
    459   if (ExampleString.empty())
    460     ExampleString = RegExStr;
    461 
    462   // Only compare up to the first line in the buffer, or the string size.
    463   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
    464   BufferPrefix = BufferPrefix.split('\n').first;
    465   return BufferPrefix.edit_distance(ExampleString);
    466 }
    467 
    468 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
    469                                const StringMap<StringRef> &VariableTable) const{
    470   // If this was a regular expression using variables, print the current
    471   // variable values.
    472   if (!VariableUses.empty()) {
    473     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
    474       SmallString<256> Msg;
    475       raw_svector_ostream OS(Msg);
    476       StringRef Var = VariableUses[i].first;
    477       if (Var[0] == '@') {
    478         std::string Value;
    479         if (EvaluateExpression(Var, Value)) {
    480           OS << "with expression \"";
    481           OS.write_escaped(Var) << "\" equal to \"";
    482           OS.write_escaped(Value) << "\"";
    483         } else {
    484           OS << "uses incorrect expression \"";
    485           OS.write_escaped(Var) << "\"";
    486         }
    487       } else {
    488         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
    489 
    490         // Check for undefined variable references.
    491         if (it == VariableTable.end()) {
    492           OS << "uses undefined variable \"";
    493           OS.write_escaped(Var) << "\"";
    494         } else {
    495           OS << "with variable \"";
    496           OS.write_escaped(Var) << "\" equal to \"";
    497           OS.write_escaped(it->second) << "\"";
    498         }
    499       }
    500 
    501       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
    502                       OS.str());
    503     }
    504   }
    505 
    506   // Attempt to find the closest/best fuzzy match.  Usually an error happens
    507   // because some string in the output didn't exactly match. In these cases, we
    508   // would like to show the user a best guess at what "should have" matched, to
    509   // save them having to actually check the input manually.
    510   size_t NumLinesForward = 0;
    511   size_t Best = StringRef::npos;
    512   double BestQuality = 0;
    513 
    514   // Use an arbitrary 4k limit on how far we will search.
    515   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
    516     if (Buffer[i] == '\n')
    517       ++NumLinesForward;
    518 
    519     // Patterns have leading whitespace stripped, so skip whitespace when
    520     // looking for something which looks like a pattern.
    521     if (Buffer[i] == ' ' || Buffer[i] == '\t')
    522       continue;
    523 
    524     // Compute the "quality" of this match as an arbitrary combination of the
    525     // match distance and the number of lines skipped to get to this match.
    526     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
    527     double Quality = Distance + (NumLinesForward / 100.);
    528 
    529     if (Quality < BestQuality || Best == StringRef::npos) {
    530       Best = i;
    531       BestQuality = Quality;
    532     }
    533   }
    534 
    535   // Print the "possible intended match here" line if we found something
    536   // reasonable and not equal to what we showed in the "scanning from here"
    537   // line.
    538   if (Best && Best != StringRef::npos && BestQuality < 50) {
    539       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
    540                       SourceMgr::DK_Note, "possible intended match here");
    541 
    542     // FIXME: If we wanted to be really friendly we would show why the match
    543     // failed, as it can be hard to spot simple one character differences.
    544   }
    545 }
    546 
    547 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
    548   // Offset keeps track of the current offset within the input Str
    549   size_t Offset = 0;
    550   // [...] Nesting depth
    551   size_t BracketDepth = 0;
    552 
    553   while (!Str.empty()) {
    554     if (Str.startswith("]]") && BracketDepth == 0)
    555       return Offset;
    556     if (Str[0] == '\\') {
    557       // Backslash escapes the next char within regexes, so skip them both.
    558       Str = Str.substr(2);
    559       Offset += 2;
    560     } else {
    561       switch (Str[0]) {
    562         default:
    563           break;
    564         case '[':
    565           BracketDepth++;
    566           break;
    567         case ']':
    568           if (BracketDepth == 0) {
    569             SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
    570                             SourceMgr::DK_Error,
    571                             "missing closing \"]\" for regex variable");
    572             exit(1);
    573           }
    574           BracketDepth--;
    575           break;
    576       }
    577       Str = Str.substr(1);
    578       Offset++;
    579     }
    580   }
    581 
    582   return StringRef::npos;
    583 }
    584 
    585 
    586 //===----------------------------------------------------------------------===//
    587 // Check Strings.
    588 //===----------------------------------------------------------------------===//
    589 
    590 /// CheckString - This is a check that we found in the input file.
    591 struct CheckString {
    592   /// Pat - The pattern to match.
    593   Pattern Pat;
    594 
    595   /// Prefix - Which prefix name this check matched.
    596   StringRef Prefix;
    597 
    598   /// Loc - The location in the match file that the check string was specified.
    599   SMLoc Loc;
    600 
    601   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
    602   /// as opposed to a CHECK: directive.
    603   Check::CheckType CheckTy;
    604 
    605   /// DagNotStrings - These are all of the strings that are disallowed from
    606   /// occurring between this match string and the previous one (or start of
    607   /// file).
    608   std::vector<Pattern> DagNotStrings;
    609 
    610 
    611   CheckString(const Pattern &P,
    612               StringRef S,
    613               SMLoc L,
    614               Check::CheckType Ty)
    615     : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
    616 
    617   /// Check - Match check string and its "not strings" and/or "dag strings".
    618   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
    619                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
    620 
    621   /// CheckNext - Verify there is a single line in the given buffer.
    622   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
    623 
    624   /// CheckSame - Verify there is no newline in the given buffer.
    625   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
    626 
    627   /// CheckNot - Verify there's no "not strings" in the given buffer.
    628   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
    629                 const std::vector<const Pattern *> &NotStrings,
    630                 StringMap<StringRef> &VariableTable) const;
    631 
    632   /// CheckDag - Match "dag strings" and their mixed "not strings".
    633   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
    634                   std::vector<const Pattern *> &NotStrings,
    635                   StringMap<StringRef> &VariableTable) const;
    636 };
    637 
    638 /// Canonicalize whitespaces in the input file. Line endings are replaced
    639 /// with UNIX-style '\n'.
    640 ///
    641 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
    642 /// characters to a single space.
    643 static std::unique_ptr<MemoryBuffer>
    644 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
    645                       bool PreserveHorizontal) {
    646   SmallString<128> NewFile;
    647   NewFile.reserve(MB->getBufferSize());
    648 
    649   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
    650        Ptr != End; ++Ptr) {
    651     // Eliminate trailing dosish \r.
    652     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
    653       continue;
    654     }
    655 
    656     // If current char is not a horizontal whitespace or if horizontal
    657     // whitespace canonicalization is disabled, dump it to output as is.
    658     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
    659       NewFile.push_back(*Ptr);
    660       continue;
    661     }
    662 
    663     // Otherwise, add one space and advance over neighboring space.
    664     NewFile.push_back(' ');
    665     while (Ptr+1 != End &&
    666            (Ptr[1] == ' ' || Ptr[1] == '\t'))
    667       ++Ptr;
    668   }
    669 
    670   return std::unique_ptr<MemoryBuffer>(
    671       MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
    672 }
    673 
    674 static bool IsPartOfWord(char c) {
    675   return (isalnum(c) || c == '-' || c == '_');
    676 }
    677 
    678 // Get the size of the prefix extension.
    679 static size_t CheckTypeSize(Check::CheckType Ty) {
    680   switch (Ty) {
    681   case Check::CheckNone:
    682     return 0;
    683 
    684   case Check::CheckPlain:
    685     return sizeof(":") - 1;
    686 
    687   case Check::CheckNext:
    688     return sizeof("-NEXT:") - 1;
    689 
    690   case Check::CheckSame:
    691     return sizeof("-SAME:") - 1;
    692 
    693   case Check::CheckNot:
    694     return sizeof("-NOT:") - 1;
    695 
    696   case Check::CheckDAG:
    697     return sizeof("-DAG:") - 1;
    698 
    699   case Check::CheckLabel:
    700     return sizeof("-LABEL:") - 1;
    701 
    702   case Check::CheckEOF:
    703     llvm_unreachable("Should not be using EOF size");
    704   }
    705 
    706   llvm_unreachable("Bad check type");
    707 }
    708 
    709 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
    710   char NextChar = Buffer[Prefix.size()];
    711 
    712   // Verify that the : is present after the prefix.
    713   if (NextChar == ':')
    714     return Check::CheckPlain;
    715 
    716   if (NextChar != '-')
    717     return Check::CheckNone;
    718 
    719   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
    720   if (Rest.startswith("NEXT:"))
    721     return Check::CheckNext;
    722 
    723   if (Rest.startswith("SAME:"))
    724     return Check::CheckSame;
    725 
    726   if (Rest.startswith("NOT:"))
    727     return Check::CheckNot;
    728 
    729   if (Rest.startswith("DAG:"))
    730     return Check::CheckDAG;
    731 
    732   if (Rest.startswith("LABEL:"))
    733     return Check::CheckLabel;
    734 
    735   return Check::CheckNone;
    736 }
    737 
    738 // From the given position, find the next character after the word.
    739 static size_t SkipWord(StringRef Str, size_t Loc) {
    740   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
    741     ++Loc;
    742   return Loc;
    743 }
    744 
    745 // Try to find the first match in buffer for any prefix. If a valid match is
    746 // found, return that prefix and set its type and location.  If there are almost
    747 // matches (e.g. the actual prefix string is found, but is not an actual check
    748 // string), but no valid match, return an empty string and set the position to
    749 // resume searching from. If no partial matches are found, return an empty
    750 // string and the location will be StringRef::npos. If one prefix is a substring
    751 // of another, the maximal match should be found. e.g. if "A" and "AA" are
    752 // prefixes then AA-CHECK: should match the second one.
    753 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
    754                                          Check::CheckType &CheckTy,
    755                                          size_t &CheckLoc) {
    756   StringRef FirstPrefix;
    757   size_t FirstLoc = StringRef::npos;
    758   size_t SearchLoc = StringRef::npos;
    759   Check::CheckType FirstTy = Check::CheckNone;
    760 
    761   CheckTy = Check::CheckNone;
    762   CheckLoc = StringRef::npos;
    763 
    764   for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
    765        I != E; ++I) {
    766     StringRef Prefix(*I);
    767     size_t PrefixLoc = Buffer.find(Prefix);
    768 
    769     if (PrefixLoc == StringRef::npos)
    770       continue;
    771 
    772     // Track where we are searching for invalid prefixes that look almost right.
    773     // We need to only advance to the first partial match on the next attempt
    774     // since a partial match could be a substring of a later, valid prefix.
    775     // Need to skip to the end of the word, otherwise we could end up
    776     // matching a prefix in a substring later.
    777     if (PrefixLoc < SearchLoc)
    778       SearchLoc = SkipWord(Buffer, PrefixLoc);
    779 
    780     // We only want to find the first match to avoid skipping some.
    781     if (PrefixLoc > FirstLoc)
    782       continue;
    783     // If one matching check-prefix is a prefix of another, choose the
    784     // longer one.
    785     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
    786       continue;
    787 
    788     StringRef Rest = Buffer.drop_front(PrefixLoc);
    789     // Make sure we have actually found the prefix, and not a word containing
    790     // it. This should also prevent matching the wrong prefix when one is a
    791     // substring of another.
    792     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
    793       FirstTy = Check::CheckNone;
    794     else
    795       FirstTy = FindCheckType(Rest, Prefix);
    796 
    797     FirstLoc = PrefixLoc;
    798     FirstPrefix = Prefix;
    799   }
    800 
    801   // If the first prefix is invalid, we should continue the search after it.
    802   if (FirstTy == Check::CheckNone) {
    803     CheckLoc = SearchLoc;
    804     return "";
    805   }
    806 
    807   CheckTy = FirstTy;
    808   CheckLoc = FirstLoc;
    809   return FirstPrefix;
    810 }
    811 
    812 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
    813                                          unsigned &LineNumber,
    814                                          Check::CheckType &CheckTy,
    815                                          size_t &CheckLoc) {
    816   while (!Buffer.empty()) {
    817     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
    818     // If we found a real match, we are done.
    819     if (!Prefix.empty()) {
    820       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
    821       return Prefix;
    822     }
    823 
    824     // We didn't find any almost matches either, we are also done.
    825     if (CheckLoc == StringRef::npos)
    826       return StringRef();
    827 
    828     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
    829 
    830     // Advance to the last possible match we found and try again.
    831     Buffer = Buffer.drop_front(CheckLoc + 1);
    832   }
    833 
    834   return StringRef();
    835 }
    836 
    837 /// ReadCheckFile - Read the check file, which specifies the sequence of
    838 /// expected strings.  The strings are added to the CheckStrings vector.
    839 /// Returns true in case of an error, false otherwise.
    840 static bool ReadCheckFile(SourceMgr &SM,
    841                           std::vector<CheckString> &CheckStrings) {
    842   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
    843       MemoryBuffer::getFileOrSTDIN(CheckFilename);
    844   if (std::error_code EC = FileOrErr.getError()) {
    845     errs() << "Could not open check file '" << CheckFilename
    846            << "': " << EC.message() << '\n';
    847     return true;
    848   }
    849 
    850   // If we want to canonicalize whitespace, strip excess whitespace from the
    851   // buffer containing the CHECK lines. Remove DOS style line endings.
    852   std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
    853       std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
    854 
    855   // Find all instances of CheckPrefix followed by : in the file.
    856   StringRef Buffer = F->getBuffer();
    857 
    858   SM.AddNewSourceBuffer(std::move(F), SMLoc());
    859 
    860   std::vector<Pattern> ImplicitNegativeChecks;
    861   for (const auto &PatternString : ImplicitCheckNot) {
    862     // Create a buffer with fake command line content in order to display the
    863     // command line option responsible for the specific implicit CHECK-NOT.
    864     std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
    865     std::string Suffix = "'";
    866     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
    867         Prefix + PatternString + Suffix, "command line");
    868 
    869     StringRef PatternInBuffer =
    870         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
    871     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
    872 
    873     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
    874     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
    875                                                "IMPLICIT-CHECK", SM, 0);
    876   }
    877 
    878 
    879   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
    880 
    881   // LineNumber keeps track of the line on which CheckPrefix instances are
    882   // found.
    883   unsigned LineNumber = 1;
    884 
    885   while (1) {
    886     Check::CheckType CheckTy;
    887     size_t PrefixLoc;
    888 
    889     // See if a prefix occurs in the memory buffer.
    890     StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
    891                                                    LineNumber,
    892                                                    CheckTy,
    893                                                    PrefixLoc);
    894     if (UsedPrefix.empty())
    895       break;
    896 
    897     Buffer = Buffer.drop_front(PrefixLoc);
    898 
    899     // Location to use for error messages.
    900     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
    901 
    902     // PrefixLoc is to the start of the prefix. Skip to the end.
    903     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
    904 
    905     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
    906     // leading and trailing whitespace.
    907     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
    908 
    909     // Scan ahead to the end of line.
    910     size_t EOL = Buffer.find_first_of("\n\r");
    911 
    912     // Remember the location of the start of the pattern, for diagnostics.
    913     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
    914 
    915     // Parse the pattern.
    916     Pattern P(CheckTy);
    917     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
    918       return true;
    919 
    920     // Verify that CHECK-LABEL lines do not define or use variables
    921     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
    922       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
    923                       SourceMgr::DK_Error,
    924                       "found '" + UsedPrefix + "-LABEL:'"
    925                       " with variable definition or use");
    926       return true;
    927     }
    928 
    929     Buffer = Buffer.substr(EOL);
    930 
    931     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
    932     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
    933         CheckStrings.empty()) {
    934       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
    935       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
    936                       SourceMgr::DK_Error,
    937                       "found '" + UsedPrefix + "-" + Type + "' without previous '"
    938                       + UsedPrefix + ": line");
    939       return true;
    940     }
    941 
    942     // Handle CHECK-DAG/-NOT.
    943     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
    944       DagNotMatches.push_back(P);
    945       continue;
    946     }
    947 
    948     // Okay, add the string we captured to the output vector and move on.
    949     CheckStrings.push_back(CheckString(P,
    950                                        UsedPrefix,
    951                                        PatternLoc,
    952                                        CheckTy));
    953     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
    954     DagNotMatches = ImplicitNegativeChecks;
    955   }
    956 
    957   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
    958   // prefix as a filler for the error message.
    959   if (!DagNotMatches.empty()) {
    960     CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
    961                                        CheckPrefixes[0],
    962                                        SMLoc::getFromPointer(Buffer.data()),
    963                                        Check::CheckEOF));
    964     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
    965   }
    966 
    967   if (CheckStrings.empty()) {
    968     errs() << "error: no check strings found with prefix"
    969            << (CheckPrefixes.size() > 1 ? "es " : " ");
    970     for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
    971       StringRef Prefix(CheckPrefixes[I]);
    972       errs() << '\'' << Prefix << ":'";
    973       if (I != N - 1)
    974         errs() << ", ";
    975     }
    976 
    977     errs() << '\n';
    978     return true;
    979   }
    980 
    981   return false;
    982 }
    983 
    984 static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
    985                              const Pattern &Pat, StringRef Buffer,
    986                              StringMap<StringRef> &VariableTable) {
    987   // Otherwise, we have an error, emit an error message.
    988   SM.PrintMessage(Loc, SourceMgr::DK_Error,
    989                   "expected string not found in input");
    990 
    991   // Print the "scanning from here" line.  If the current position is at the
    992   // end of a line, advance to the start of the next line.
    993   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
    994 
    995   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
    996                   "scanning from here");
    997 
    998   // Allow the pattern to print additional information if desired.
    999   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
   1000 }
   1001 
   1002 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
   1003                              StringRef Buffer,
   1004                              StringMap<StringRef> &VariableTable) {
   1005   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
   1006 }
   1007 
   1008 /// CountNumNewlinesBetween - Count the number of newlines in the specified
   1009 /// range.
   1010 static unsigned CountNumNewlinesBetween(StringRef Range,
   1011                                         const char *&FirstNewLine) {
   1012   unsigned NumNewLines = 0;
   1013   while (1) {
   1014     // Scan for newline.
   1015     Range = Range.substr(Range.find_first_of("\n\r"));
   1016     if (Range.empty()) return NumNewLines;
   1017 
   1018     ++NumNewLines;
   1019 
   1020     // Handle \n\r and \r\n as a single newline.
   1021     if (Range.size() > 1 &&
   1022         (Range[1] == '\n' || Range[1] == '\r') &&
   1023         (Range[0] != Range[1]))
   1024       Range = Range.substr(1);
   1025     Range = Range.substr(1);
   1026 
   1027     if (NumNewLines == 1)
   1028       FirstNewLine = Range.begin();
   1029   }
   1030 }
   1031 
   1032 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
   1033                           bool IsLabelScanMode, size_t &MatchLen,
   1034                           StringMap<StringRef> &VariableTable) const {
   1035   size_t LastPos = 0;
   1036   std::vector<const Pattern *> NotStrings;
   1037 
   1038   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
   1039   // bounds; we have not processed variable definitions within the bounded block
   1040   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
   1041   // over the block again (including the last CHECK-LABEL) in normal mode.
   1042   if (!IsLabelScanMode) {
   1043     // Match "dag strings" (with mixed "not strings" if any).
   1044     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
   1045     if (LastPos == StringRef::npos)
   1046       return StringRef::npos;
   1047   }
   1048 
   1049   // Match itself from the last position after matching CHECK-DAG.
   1050   StringRef MatchBuffer = Buffer.substr(LastPos);
   1051   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
   1052   if (MatchPos == StringRef::npos) {
   1053     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
   1054     return StringRef::npos;
   1055   }
   1056 
   1057   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
   1058   // or CHECK-NOT
   1059   if (!IsLabelScanMode) {
   1060     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
   1061 
   1062     // If this check is a "CHECK-NEXT", verify that the previous match was on
   1063     // the previous line (i.e. that there is one newline between them).
   1064     if (CheckNext(SM, SkippedRegion))
   1065       return StringRef::npos;
   1066 
   1067     // If this check is a "CHECK-SAME", verify that the previous match was on
   1068     // the same line (i.e. that there is no newline between them).
   1069     if (CheckSame(SM, SkippedRegion))
   1070       return StringRef::npos;
   1071 
   1072     // If this match had "not strings", verify that they don't exist in the
   1073     // skipped region.
   1074     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
   1075       return StringRef::npos;
   1076   }
   1077 
   1078   return LastPos + MatchPos;
   1079 }
   1080 
   1081 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
   1082   if (CheckTy != Check::CheckNext)
   1083     return false;
   1084 
   1085   // Count the number of newlines between the previous match and this one.
   1086   assert(Buffer.data() !=
   1087          SM.getMemoryBuffer(
   1088            SM.FindBufferContainingLoc(
   1089              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
   1090          "CHECK-NEXT can't be the first check in a file");
   1091 
   1092   const char *FirstNewLine = nullptr;
   1093   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
   1094 
   1095   if (NumNewLines == 0) {
   1096     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
   1097                     "-NEXT: is on the same line as previous match");
   1098     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
   1099                     SourceMgr::DK_Note, "'next' match was here");
   1100     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
   1101                     "previous match ended here");
   1102     return true;
   1103   }
   1104 
   1105   if (NumNewLines != 1) {
   1106     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
   1107                     "-NEXT: is not on the line after the previous match");
   1108     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
   1109                     SourceMgr::DK_Note, "'next' match was here");
   1110     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
   1111                     "previous match ended here");
   1112     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
   1113                     "non-matching line after previous match is here");
   1114     return true;
   1115   }
   1116 
   1117   return false;
   1118 }
   1119 
   1120 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
   1121   if (CheckTy != Check::CheckSame)
   1122     return false;
   1123 
   1124   // Count the number of newlines between the previous match and this one.
   1125   assert(Buffer.data() !=
   1126              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
   1127                                     SMLoc::getFromPointer(Buffer.data())))
   1128                  ->getBufferStart() &&
   1129          "CHECK-SAME can't be the first check in a file");
   1130 
   1131   const char *FirstNewLine = nullptr;
   1132   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
   1133 
   1134   if (NumNewLines != 0) {
   1135     SM.PrintMessage(Loc, SourceMgr::DK_Error,
   1136                     Prefix +
   1137                         "-SAME: is not on the same line as the previous match");
   1138     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
   1139                     "'next' match was here");
   1140     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
   1141                     "previous match ended here");
   1142     return true;
   1143   }
   1144 
   1145   return false;
   1146 }
   1147 
   1148 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
   1149                            const std::vector<const Pattern *> &NotStrings,
   1150                            StringMap<StringRef> &VariableTable) const {
   1151   for (unsigned ChunkNo = 0, e = NotStrings.size();
   1152        ChunkNo != e; ++ChunkNo) {
   1153     const Pattern *Pat = NotStrings[ChunkNo];
   1154     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
   1155 
   1156     size_t MatchLen = 0;
   1157     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
   1158 
   1159     if (Pos == StringRef::npos) continue;
   1160 
   1161     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
   1162                     SourceMgr::DK_Error,
   1163                     Prefix + "-NOT: string occurred!");
   1164     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
   1165                     Prefix + "-NOT: pattern specified here");
   1166     return true;
   1167   }
   1168 
   1169   return false;
   1170 }
   1171 
   1172 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
   1173                              std::vector<const Pattern *> &NotStrings,
   1174                              StringMap<StringRef> &VariableTable) const {
   1175   if (DagNotStrings.empty())
   1176     return 0;
   1177 
   1178   size_t LastPos = 0;
   1179   size_t StartPos = LastPos;
   1180 
   1181   for (unsigned ChunkNo = 0, e = DagNotStrings.size();
   1182        ChunkNo != e; ++ChunkNo) {
   1183     const Pattern &Pat = DagNotStrings[ChunkNo];
   1184 
   1185     assert((Pat.getCheckTy() == Check::CheckDAG ||
   1186             Pat.getCheckTy() == Check::CheckNot) &&
   1187            "Invalid CHECK-DAG or CHECK-NOT!");
   1188 
   1189     if (Pat.getCheckTy() == Check::CheckNot) {
   1190       NotStrings.push_back(&Pat);
   1191       continue;
   1192     }
   1193 
   1194     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
   1195 
   1196     size_t MatchLen = 0, MatchPos;
   1197 
   1198     // CHECK-DAG always matches from the start.
   1199     StringRef MatchBuffer = Buffer.substr(StartPos);
   1200     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
   1201     // With a group of CHECK-DAGs, a single mismatching means the match on
   1202     // that group of CHECK-DAGs fails immediately.
   1203     if (MatchPos == StringRef::npos) {
   1204       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
   1205       return StringRef::npos;
   1206     }
   1207     // Re-calc it as the offset relative to the start of the original string.
   1208     MatchPos += StartPos;
   1209 
   1210     if (!NotStrings.empty()) {
   1211       if (MatchPos < LastPos) {
   1212         // Reordered?
   1213         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
   1214                         SourceMgr::DK_Error,
   1215                         Prefix + "-DAG: found a match of CHECK-DAG"
   1216                         " reordering across a CHECK-NOT");
   1217         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
   1218                         SourceMgr::DK_Note,
   1219                         Prefix + "-DAG: the farthest match of CHECK-DAG"
   1220                         " is found here");
   1221         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
   1222                         Prefix + "-NOT: the crossed pattern specified"
   1223                         " here");
   1224         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
   1225                         Prefix + "-DAG: the reordered pattern specified"
   1226                         " here");
   1227         return StringRef::npos;
   1228       }
   1229       // All subsequent CHECK-DAGs should be matched from the farthest
   1230       // position of all precedent CHECK-DAGs (including this one.)
   1231       StartPos = LastPos;
   1232       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
   1233       // CHECK-DAG, verify that there's no 'not' strings occurred in that
   1234       // region.
   1235       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
   1236       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
   1237         return StringRef::npos;
   1238       // Clear "not strings".
   1239       NotStrings.clear();
   1240     }
   1241 
   1242     // Update the last position with CHECK-DAG matches.
   1243     LastPos = std::max(MatchPos + MatchLen, LastPos);
   1244   }
   1245 
   1246   return LastPos;
   1247 }
   1248 
   1249 // A check prefix must contain only alphanumeric, hyphens and underscores.
   1250 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
   1251   Regex Validator("^[a-zA-Z0-9_-]*$");
   1252   return Validator.match(CheckPrefix);
   1253 }
   1254 
   1255 static bool ValidateCheckPrefixes() {
   1256   StringSet<> PrefixSet;
   1257 
   1258   for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
   1259        I != E; ++I) {
   1260     StringRef Prefix(*I);
   1261 
   1262     // Reject empty prefixes.
   1263     if (Prefix == "")
   1264       return false;
   1265 
   1266     if (!PrefixSet.insert(Prefix).second)
   1267       return false;
   1268 
   1269     if (!ValidateCheckPrefix(Prefix))
   1270       return false;
   1271   }
   1272 
   1273   return true;
   1274 }
   1275 
   1276 // I don't think there's a way to specify an initial value for cl::list,
   1277 // so if nothing was specified, add the default
   1278 static void AddCheckPrefixIfNeeded() {
   1279   if (CheckPrefixes.empty())
   1280     CheckPrefixes.push_back("CHECK");
   1281 }
   1282 
   1283 int main(int argc, char **argv) {
   1284   sys::PrintStackTraceOnErrorSignal();
   1285   PrettyStackTraceProgram X(argc, argv);
   1286   cl::ParseCommandLineOptions(argc, argv);
   1287 
   1288   if (!ValidateCheckPrefixes()) {
   1289     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
   1290               "start with a letter and contain only alphanumeric characters, "
   1291               "hyphens and underscores\n";
   1292     return 2;
   1293   }
   1294 
   1295   AddCheckPrefixIfNeeded();
   1296 
   1297   SourceMgr SM;
   1298 
   1299   // Read the expected strings from the check file.
   1300   std::vector<CheckString> CheckStrings;
   1301   if (ReadCheckFile(SM, CheckStrings))
   1302     return 2;
   1303 
   1304   // Open the file to check and add it to SourceMgr.
   1305   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
   1306       MemoryBuffer::getFileOrSTDIN(InputFilename);
   1307   if (std::error_code EC = FileOrErr.getError()) {
   1308     errs() << "Could not open input file '" << InputFilename
   1309            << "': " << EC.message() << '\n';
   1310     return 2;
   1311   }
   1312   std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
   1313 
   1314   if (File->getBufferSize() == 0 && !AllowEmptyInput) {
   1315     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
   1316     return 2;
   1317   }
   1318 
   1319   // Remove duplicate spaces in the input file if requested.
   1320   // Remove DOS style line endings.
   1321   std::unique_ptr<MemoryBuffer> F =
   1322       CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
   1323 
   1324   // Check that we have all of the expected strings, in order, in the input
   1325   // file.
   1326   StringRef Buffer = F->getBuffer();
   1327 
   1328   SM.AddNewSourceBuffer(std::move(F), SMLoc());
   1329 
   1330   /// VariableTable - This holds all the current filecheck variables.
   1331   StringMap<StringRef> VariableTable;
   1332 
   1333   bool hasError = false;
   1334 
   1335   unsigned i = 0, j = 0, e = CheckStrings.size();
   1336 
   1337   while (true) {
   1338     StringRef CheckRegion;
   1339     if (j == e) {
   1340       CheckRegion = Buffer;
   1341     } else {
   1342       const CheckString &CheckLabelStr = CheckStrings[j];
   1343       if (CheckLabelStr.CheckTy != Check::CheckLabel) {
   1344         ++j;
   1345         continue;
   1346       }
   1347 
   1348       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
   1349       size_t MatchLabelLen = 0;
   1350       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
   1351                                                  MatchLabelLen, VariableTable);
   1352       if (MatchLabelPos == StringRef::npos) {
   1353         hasError = true;
   1354         break;
   1355       }
   1356 
   1357       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
   1358       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
   1359       ++j;
   1360     }
   1361 
   1362     for ( ; i != j; ++i) {
   1363       const CheckString &CheckStr = CheckStrings[i];
   1364 
   1365       // Check each string within the scanned region, including a second check
   1366       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
   1367       size_t MatchLen = 0;
   1368       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
   1369                                        VariableTable);
   1370 
   1371       if (MatchPos == StringRef::npos) {
   1372         hasError = true;
   1373         i = j;
   1374         break;
   1375       }
   1376 
   1377       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
   1378     }
   1379 
   1380     if (j == e)
   1381       break;
   1382   }
   1383 
   1384   return hasError ? 1 : 0;
   1385 }
   1386