Home | History | Annotate | Download | only in FileCheck
      1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // FileCheck does a line-by line check of a file that validates whether it
     11 // contains the expected content.  This is useful for regression tests etc.
     12 //
     13 // This program exits with an error status of 2 on error, exit status of 0 if
     14 // the file matched the expected contents, and exit status of 1 if it did not
     15 // contain the expected contents.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "llvm/ADT/SmallString.h"
     20 #include "llvm/ADT/StringExtras.h"
     21 #include "llvm/ADT/StringMap.h"
     22 #include "llvm/ADT/StringSet.h"
     23 #include "llvm/Support/CommandLine.h"
     24 #include "llvm/Support/MemoryBuffer.h"
     25 #include "llvm/Support/PrettyStackTrace.h"
     26 #include "llvm/Support/Regex.h"
     27 #include "llvm/Support/Signals.h"
     28 #include "llvm/Support/SourceMgr.h"
     29 #include "llvm/Support/raw_ostream.h"
     30 #include <algorithm>
     31 #include <cctype>
     32 #include <map>
     33 #include <string>
     34 #include <system_error>
     35 #include <vector>
     36 using namespace llvm;
     37 
     38 static cl::opt<std::string>
     39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
     40 
     41 static cl::opt<std::string>
     42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
     43               cl::init("-"), cl::value_desc("filename"));
     44 
     45 static cl::list<std::string>
     46 CheckPrefixes("check-prefix",
     47               cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
     48 
     49 static cl::opt<bool>
     50 NoCanonicalizeWhiteSpace("strict-whitespace",
     51               cl::desc("Do not treat all horizontal whitespace as equivalent"));
     52 
     53 static cl::list<std::string> ImplicitCheckNot(
     54     "implicit-check-not",
     55     cl::desc("Add an implicit negative check with this pattern to every\n"
     56              "positive check. This can be used to ensure that no instances of\n"
     57              "this pattern occur which are not matched by a positive pattern"),
     58     cl::value_desc("pattern"));
     59 
     60 static cl::opt<bool> AllowEmptyInput(
     61     "allow-empty", cl::init(false),
     62     cl::desc("Allow the input file to be empty. This is useful when making\n"
     63              "checks that some error message does not occur, for example."));
     64 
     65 typedef cl::list<std::string>::const_iterator prefix_iterator;
     66 
     67 //===----------------------------------------------------------------------===//
     68 // Pattern Handling Code.
     69 //===----------------------------------------------------------------------===//
     70 
     71 namespace Check {
     72   enum CheckType {
     73     CheckNone = 0,
     74     CheckPlain,
     75     CheckNext,
     76     CheckSame,
     77     CheckNot,
     78     CheckDAG,
     79     CheckLabel,
     80 
     81     /// MatchEOF - When set, this pattern only matches the end of file. This is
     82     /// used for trailing CHECK-NOTs.
     83     CheckEOF
     84   };
     85 }
     86 
     87 class Pattern {
     88   SMLoc PatternLoc;
     89 
     90   Check::CheckType CheckTy;
     91 
     92   /// FixedStr - If non-empty, this pattern is a fixed string match with the
     93   /// specified fixed string.
     94   StringRef FixedStr;
     95 
     96   /// RegEx - If non-empty, this is a regex pattern.
     97   std::string RegExStr;
     98 
     99   /// \brief Contains the number of line this pattern is in.
    100   unsigned LineNumber;
    101 
    102   /// VariableUses - Entries in this vector map to uses of a variable in the
    103   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
    104   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
    105   /// value of bar at offset 3.
    106   std::vector<std::pair<StringRef, unsigned> > VariableUses;
    107 
    108   /// VariableDefs - Maps definitions of variables to their parenthesized
    109   /// capture numbers.
    110   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
    111   std::map<StringRef, unsigned> VariableDefs;
    112 
    113 public:
    114 
    115   Pattern(Check::CheckType Ty)
    116     : CheckTy(Ty) { }
    117 
    118   /// getLoc - Return the location in source code.
    119   SMLoc getLoc() const { return PatternLoc; }
    120 
    121   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
    122   /// which prefix is being matched, SM provides the SourceMgr used for error
    123   /// reports, and LineNumber is the line number in the input file from which
    124   /// the pattern string was read.  Returns true in case of an error, false
    125   /// otherwise.
    126   bool ParsePattern(StringRef PatternStr,
    127                     StringRef Prefix,
    128                     SourceMgr &SM,
    129                     unsigned LineNumber);
    130 
    131   /// Match - Match the pattern string against the input buffer Buffer.  This
    132   /// returns the position that is matched or npos if there is no match.  If
    133   /// there is a match, the size of the matched string is returned in MatchLen.
    134   ///
    135   /// The VariableTable StringMap provides the current values of filecheck
    136   /// variables and is updated if this match defines new values.
    137   size_t Match(StringRef Buffer, size_t &MatchLen,
    138                StringMap<StringRef> &VariableTable) const;
    139 
    140   /// PrintFailureInfo - Print additional information about a failure to match
    141   /// involving this pattern.
    142   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
    143                         const StringMap<StringRef> &VariableTable) const;
    144 
    145   bool hasVariable() const { return !(VariableUses.empty() &&
    146                                       VariableDefs.empty()); }
    147 
    148   Check::CheckType getCheckTy() const { return CheckTy; }
    149 
    150 private:
    151   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
    152   void AddBackrefToRegEx(unsigned BackrefNum);
    153 
    154   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
    155   /// matching this pattern at the start of \arg Buffer; a distance of zero
    156   /// should correspond to a perfect match.
    157   unsigned ComputeMatchDistance(StringRef Buffer,
    158                                const StringMap<StringRef> &VariableTable) const;
    159 
    160   /// \brief Evaluates expression and stores the result to \p Value.
    161   /// \return true on success. false when the expression has invalid syntax.
    162   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
    163 
    164   /// \brief Finds the closing sequence of a regex variable usage or
    165   /// definition. Str has to point in the beginning of the definition
    166   /// (right after the opening sequence).
    167   /// \return offset of the closing sequence within Str, or npos if it was not
    168   /// found.
    169   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
    170 };
    171 
    172 
    173 bool Pattern::ParsePattern(StringRef PatternStr,
    174                            StringRef Prefix,
    175                            SourceMgr &SM,
    176                            unsigned LineNumber) {
    177   this->LineNumber = LineNumber;
    178   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
    179 
    180   // Ignore trailing whitespace.
    181   while (!PatternStr.empty() &&
    182          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
    183     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
    184 
    185   // Check that there is something on the line.
    186   if (PatternStr.empty()) {
    187     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
    188                     "found empty check string with prefix '" +
    189                     Prefix + ":'");
    190     return true;
    191   }
    192 
    193   // Check to see if this is a fixed string, or if it has regex pieces.
    194   if (PatternStr.size() < 2 ||
    195       (PatternStr.find("{{") == StringRef::npos &&
    196        PatternStr.find("[[") == StringRef::npos)) {
    197     FixedStr = PatternStr;
    198     return false;
    199   }
    200 
    201   // Paren value #0 is for the fully matched string.  Any new parenthesized
    202   // values add from there.
    203   unsigned CurParen = 1;
    204 
    205   // Otherwise, there is at least one regex piece.  Build up the regex pattern
    206   // by escaping scary characters in fixed strings, building up one big regex.
    207   while (!PatternStr.empty()) {
    208     // RegEx matches.
    209     if (PatternStr.startswith("{{")) {
    210       // This is the start of a regex match.  Scan for the }}.
    211       size_t End = PatternStr.find("}}");
    212       if (End == StringRef::npos) {
    213         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
    214                         SourceMgr::DK_Error,
    215                         "found start of regex string with no end '}}'");
    216         return true;
    217       }
    218 
    219       // Enclose {{}} patterns in parens just like [[]] even though we're not
    220       // capturing the result for any purpose.  This is required in case the
    221       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
    222       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
    223       RegExStr += '(';
    224       ++CurParen;
    225 
    226       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
    227         return true;
    228       RegExStr += ')';
    229 
    230       PatternStr = PatternStr.substr(End+2);
    231       continue;
    232     }
    233 
    234     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
    235     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
    236     // second form is [[foo]] which is a reference to foo.  The variable name
    237     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
    238     // it.  This is to catch some common errors.
    239     if (PatternStr.startswith("[[")) {
    240       // Find the closing bracket pair ending the match.  End is going to be an
    241       // offset relative to the beginning of the match string.
    242       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
    243 
    244       if (End == StringRef::npos) {
    245         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
    246                         SourceMgr::DK_Error,
    247                         "invalid named regex reference, no ]] found");
    248         return true;
    249       }
    250 
    251       StringRef MatchStr = PatternStr.substr(2, End);
    252       PatternStr = PatternStr.substr(End+4);
    253 
    254       // Get the regex name (e.g. "foo").
    255       size_t NameEnd = MatchStr.find(':');
    256       StringRef Name = MatchStr.substr(0, NameEnd);
    257 
    258       if (Name.empty()) {
    259         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
    260                         "invalid name in named regex: empty name");
    261         return true;
    262       }
    263 
    264       // Verify that the name/expression is well formed. FileCheck currently
    265       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
    266       // is relaxed, more strict check is performed in \c EvaluateExpression.
    267       bool IsExpression = false;
    268       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
    269         if (i == 0 && Name[i] == '@') {
    270           if (NameEnd != StringRef::npos) {
    271             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
    272                             SourceMgr::DK_Error,
    273                             "invalid name in named regex definition");
    274             return true;
    275           }
    276           IsExpression = true;
    277           continue;
    278         }
    279         if (Name[i] != '_' && !isalnum(Name[i]) &&
    280             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
    281           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
    282                           SourceMgr::DK_Error, "invalid name in named regex");
    283           return true;
    284         }
    285       }
    286 
    287       // Name can't start with a digit.
    288       if (isdigit(static_cast<unsigned char>(Name[0]))) {
    289         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
    290                         "invalid name in named regex");
    291         return true;
    292       }
    293 
    294       // Handle [[foo]].
    295       if (NameEnd == StringRef::npos) {
    296         // Handle variables that were defined earlier on the same line by
    297         // emitting a backreference.
    298         if (VariableDefs.find(Name) != VariableDefs.end()) {
    299           unsigned VarParenNum = VariableDefs[Name];
    300           if (VarParenNum < 1 || VarParenNum > 9) {
    301             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
    302                             SourceMgr::DK_Error,
    303                             "Can't back-reference more than 9 variables");
    304             return true;
    305           }
    306           AddBackrefToRegEx(VarParenNum);
    307         } else {
    308           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
    309         }
    310         continue;
    311       }
    312 
    313       // Handle [[foo:.*]].
    314       VariableDefs[Name] = CurParen;
    315       RegExStr += '(';
    316       ++CurParen;
    317 
    318       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
    319         return true;
    320 
    321       RegExStr += ')';
    322     }
    323 
    324     // Handle fixed string matches.
    325     // Find the end, which is the start of the next regex.
    326     size_t FixedMatchEnd = PatternStr.find("{{");
    327     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
    328     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
    329     PatternStr = PatternStr.substr(FixedMatchEnd);
    330   }
    331 
    332   return false;
    333 }
    334 
    335 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
    336                               SourceMgr &SM) {
    337   Regex R(RS);
    338   std::string Error;
    339   if (!R.isValid(Error)) {
    340     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
    341                     "invalid regex: " + Error);
    342     return true;
    343   }
    344 
    345   RegExStr += RS.str();
    346   CurParen += R.getNumMatches();
    347   return false;
    348 }
    349 
    350 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
    351   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
    352   std::string Backref = std::string("\\") +
    353                         std::string(1, '0' + BackrefNum);
    354   RegExStr += Backref;
    355 }
    356 
    357 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
    358   // The only supported expression is @LINE([\+-]\d+)?
    359   if (!Expr.startswith("@LINE"))
    360     return false;
    361   Expr = Expr.substr(StringRef("@LINE").size());
    362   int Offset = 0;
    363   if (!Expr.empty()) {
    364     if (Expr[0] == '+')
    365       Expr = Expr.substr(1);
    366     else if (Expr[0] != '-')
    367       return false;
    368     if (Expr.getAsInteger(10, Offset))
    369       return false;
    370   }
    371   Value = llvm::itostr(LineNumber + Offset);
    372   return true;
    373 }
    374 
    375 /// Match - Match the pattern string against the input buffer Buffer.  This
    376 /// returns the position that is matched or npos if there is no match.  If
    377 /// there is a match, the size of the matched string is returned in MatchLen.
    378 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
    379                       StringMap<StringRef> &VariableTable) const {
    380   // If this is the EOF pattern, match it immediately.
    381   if (CheckTy == Check::CheckEOF) {
    382     MatchLen = 0;
    383     return Buffer.size();
    384   }
    385 
    386   // If this is a fixed string pattern, just match it now.
    387   if (!FixedStr.empty()) {
    388     MatchLen = FixedStr.size();
    389     return Buffer.find(FixedStr);
    390   }
    391 
    392   // Regex match.
    393 
    394   // If there are variable uses, we need to create a temporary string with the
    395   // actual value.
    396   StringRef RegExToMatch = RegExStr;
    397   std::string TmpStr;
    398   if (!VariableUses.empty()) {
    399     TmpStr = RegExStr;
    400 
    401     unsigned InsertOffset = 0;
    402     for (const auto &VariableUse : VariableUses) {
    403       std::string Value;
    404 
    405       if (VariableUse.first[0] == '@') {
    406         if (!EvaluateExpression(VariableUse.first, Value))
    407           return StringRef::npos;
    408       } else {
    409         StringMap<StringRef>::iterator it =
    410             VariableTable.find(VariableUse.first);
    411         // If the variable is undefined, return an error.
    412         if (it == VariableTable.end())
    413           return StringRef::npos;
    414 
    415         // Look up the value and escape it so that we can put it into the regex.
    416         Value += Regex::escape(it->second);
    417       }
    418 
    419       // Plop it into the regex at the adjusted offset.
    420       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
    421                     Value.begin(), Value.end());
    422       InsertOffset += Value.size();
    423     }
    424 
    425     // Match the newly constructed regex.
    426     RegExToMatch = TmpStr;
    427   }
    428 
    429 
    430   SmallVector<StringRef, 4> MatchInfo;
    431   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
    432     return StringRef::npos;
    433 
    434   // Successful regex match.
    435   assert(!MatchInfo.empty() && "Didn't get any match");
    436   StringRef FullMatch = MatchInfo[0];
    437 
    438   // If this defines any variables, remember their values.
    439   for (const auto &VariableDef : VariableDefs) {
    440     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
    441     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
    442   }
    443 
    444   MatchLen = FullMatch.size();
    445   return FullMatch.data()-Buffer.data();
    446 }
    447 
    448 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
    449                               const StringMap<StringRef> &VariableTable) const {
    450   // Just compute the number of matching characters. For regular expressions, we
    451   // just compare against the regex itself and hope for the best.
    452   //
    453   // FIXME: One easy improvement here is have the regex lib generate a single
    454   // example regular expression which matches, and use that as the example
    455   // string.
    456   StringRef ExampleString(FixedStr);
    457   if (ExampleString.empty())
    458     ExampleString = RegExStr;
    459 
    460   // Only compare up to the first line in the buffer, or the string size.
    461   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
    462   BufferPrefix = BufferPrefix.split('\n').first;
    463   return BufferPrefix.edit_distance(ExampleString);
    464 }
    465 
    466 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
    467                                const StringMap<StringRef> &VariableTable) const{
    468   // If this was a regular expression using variables, print the current
    469   // variable values.
    470   if (!VariableUses.empty()) {
    471     for (const auto &VariableUse : VariableUses) {
    472       SmallString<256> Msg;
    473       raw_svector_ostream OS(Msg);
    474       StringRef Var = VariableUse.first;
    475       if (Var[0] == '@') {
    476         std::string Value;
    477         if (EvaluateExpression(Var, Value)) {
    478           OS << "with expression \"";
    479           OS.write_escaped(Var) << "\" equal to \"";
    480           OS.write_escaped(Value) << "\"";
    481         } else {
    482           OS << "uses incorrect expression \"";
    483           OS.write_escaped(Var) << "\"";
    484         }
    485       } else {
    486         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
    487 
    488         // Check for undefined variable references.
    489         if (it == VariableTable.end()) {
    490           OS << "uses undefined variable \"";
    491           OS.write_escaped(Var) << "\"";
    492         } else {
    493           OS << "with variable \"";
    494           OS.write_escaped(Var) << "\" equal to \"";
    495           OS.write_escaped(it->second) << "\"";
    496         }
    497       }
    498 
    499       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
    500                       OS.str());
    501     }
    502   }
    503 
    504   // Attempt to find the closest/best fuzzy match.  Usually an error happens
    505   // because some string in the output didn't exactly match. In these cases, we
    506   // would like to show the user a best guess at what "should have" matched, to
    507   // save them having to actually check the input manually.
    508   size_t NumLinesForward = 0;
    509   size_t Best = StringRef::npos;
    510   double BestQuality = 0;
    511 
    512   // Use an arbitrary 4k limit on how far we will search.
    513   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
    514     if (Buffer[i] == '\n')
    515       ++NumLinesForward;
    516 
    517     // Patterns have leading whitespace stripped, so skip whitespace when
    518     // looking for something which looks like a pattern.
    519     if (Buffer[i] == ' ' || Buffer[i] == '\t')
    520       continue;
    521 
    522     // Compute the "quality" of this match as an arbitrary combination of the
    523     // match distance and the number of lines skipped to get to this match.
    524     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
    525     double Quality = Distance + (NumLinesForward / 100.);
    526 
    527     if (Quality < BestQuality || Best == StringRef::npos) {
    528       Best = i;
    529       BestQuality = Quality;
    530     }
    531   }
    532 
    533   // Print the "possible intended match here" line if we found something
    534   // reasonable and not equal to what we showed in the "scanning from here"
    535   // line.
    536   if (Best && Best != StringRef::npos && BestQuality < 50) {
    537       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
    538                       SourceMgr::DK_Note, "possible intended match here");
    539 
    540     // FIXME: If we wanted to be really friendly we would show why the match
    541     // failed, as it can be hard to spot simple one character differences.
    542   }
    543 }
    544 
    545 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
    546   // Offset keeps track of the current offset within the input Str
    547   size_t Offset = 0;
    548   // [...] Nesting depth
    549   size_t BracketDepth = 0;
    550 
    551   while (!Str.empty()) {
    552     if (Str.startswith("]]") && BracketDepth == 0)
    553       return Offset;
    554     if (Str[0] == '\\') {
    555       // Backslash escapes the next char within regexes, so skip them both.
    556       Str = Str.substr(2);
    557       Offset += 2;
    558     } else {
    559       switch (Str[0]) {
    560         default:
    561           break;
    562         case '[':
    563           BracketDepth++;
    564           break;
    565         case ']':
    566           if (BracketDepth == 0) {
    567             SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
    568                             SourceMgr::DK_Error,
    569                             "missing closing \"]\" for regex variable");
    570             exit(1);
    571           }
    572           BracketDepth--;
    573           break;
    574       }
    575       Str = Str.substr(1);
    576       Offset++;
    577     }
    578   }
    579 
    580   return StringRef::npos;
    581 }
    582 
    583 
    584 //===----------------------------------------------------------------------===//
    585 // Check Strings.
    586 //===----------------------------------------------------------------------===//
    587 
    588 /// CheckString - This is a check that we found in the input file.
    589 struct CheckString {
    590   /// Pat - The pattern to match.
    591   Pattern Pat;
    592 
    593   /// Prefix - Which prefix name this check matched.
    594   StringRef Prefix;
    595 
    596   /// Loc - The location in the match file that the check string was specified.
    597   SMLoc Loc;
    598 
    599   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
    600   /// as opposed to a CHECK: directive.
    601   Check::CheckType CheckTy;
    602 
    603   /// DagNotStrings - These are all of the strings that are disallowed from
    604   /// occurring between this match string and the previous one (or start of
    605   /// file).
    606   std::vector<Pattern> DagNotStrings;
    607 
    608 
    609   CheckString(const Pattern &P,
    610               StringRef S,
    611               SMLoc L,
    612               Check::CheckType Ty)
    613     : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
    614 
    615   /// Check - Match check string and its "not strings" and/or "dag strings".
    616   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
    617                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
    618 
    619   /// CheckNext - Verify there is a single line in the given buffer.
    620   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
    621 
    622   /// CheckSame - Verify there is no newline in the given buffer.
    623   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
    624 
    625   /// CheckNot - Verify there's no "not strings" in the given buffer.
    626   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
    627                 const std::vector<const Pattern *> &NotStrings,
    628                 StringMap<StringRef> &VariableTable) const;
    629 
    630   /// CheckDag - Match "dag strings" and their mixed "not strings".
    631   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
    632                   std::vector<const Pattern *> &NotStrings,
    633                   StringMap<StringRef> &VariableTable) const;
    634 };
    635 
    636 /// Canonicalize whitespaces in the input file. Line endings are replaced
    637 /// with UNIX-style '\n'.
    638 ///
    639 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
    640 /// characters to a single space.
    641 static std::unique_ptr<MemoryBuffer>
    642 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
    643                       bool PreserveHorizontal) {
    644   SmallString<128> NewFile;
    645   NewFile.reserve(MB->getBufferSize());
    646 
    647   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
    648        Ptr != End; ++Ptr) {
    649     // Eliminate trailing dosish \r.
    650     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
    651       continue;
    652     }
    653 
    654     // If current char is not a horizontal whitespace or if horizontal
    655     // whitespace canonicalization is disabled, dump it to output as is.
    656     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
    657       NewFile.push_back(*Ptr);
    658       continue;
    659     }
    660 
    661     // Otherwise, add one space and advance over neighboring space.
    662     NewFile.push_back(' ');
    663     while (Ptr+1 != End &&
    664            (Ptr[1] == ' ' || Ptr[1] == '\t'))
    665       ++Ptr;
    666   }
    667 
    668   return std::unique_ptr<MemoryBuffer>(
    669       MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
    670 }
    671 
    672 static bool IsPartOfWord(char c) {
    673   return (isalnum(c) || c == '-' || c == '_');
    674 }
    675 
    676 // Get the size of the prefix extension.
    677 static size_t CheckTypeSize(Check::CheckType Ty) {
    678   switch (Ty) {
    679   case Check::CheckNone:
    680     return 0;
    681 
    682   case Check::CheckPlain:
    683     return sizeof(":") - 1;
    684 
    685   case Check::CheckNext:
    686     return sizeof("-NEXT:") - 1;
    687 
    688   case Check::CheckSame:
    689     return sizeof("-SAME:") - 1;
    690 
    691   case Check::CheckNot:
    692     return sizeof("-NOT:") - 1;
    693 
    694   case Check::CheckDAG:
    695     return sizeof("-DAG:") - 1;
    696 
    697   case Check::CheckLabel:
    698     return sizeof("-LABEL:") - 1;
    699 
    700   case Check::CheckEOF:
    701     llvm_unreachable("Should not be using EOF size");
    702   }
    703 
    704   llvm_unreachable("Bad check type");
    705 }
    706 
    707 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
    708   char NextChar = Buffer[Prefix.size()];
    709 
    710   // Verify that the : is present after the prefix.
    711   if (NextChar == ':')
    712     return Check::CheckPlain;
    713 
    714   if (NextChar != '-')
    715     return Check::CheckNone;
    716 
    717   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
    718   if (Rest.startswith("NEXT:"))
    719     return Check::CheckNext;
    720 
    721   if (Rest.startswith("SAME:"))
    722     return Check::CheckSame;
    723 
    724   if (Rest.startswith("NOT:"))
    725     return Check::CheckNot;
    726 
    727   if (Rest.startswith("DAG:"))
    728     return Check::CheckDAG;
    729 
    730   if (Rest.startswith("LABEL:"))
    731     return Check::CheckLabel;
    732 
    733   return Check::CheckNone;
    734 }
    735 
    736 // From the given position, find the next character after the word.
    737 static size_t SkipWord(StringRef Str, size_t Loc) {
    738   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
    739     ++Loc;
    740   return Loc;
    741 }
    742 
    743 // Try to find the first match in buffer for any prefix. If a valid match is
    744 // found, return that prefix and set its type and location.  If there are almost
    745 // matches (e.g. the actual prefix string is found, but is not an actual check
    746 // string), but no valid match, return an empty string and set the position to
    747 // resume searching from. If no partial matches are found, return an empty
    748 // string and the location will be StringRef::npos. If one prefix is a substring
    749 // of another, the maximal match should be found. e.g. if "A" and "AA" are
    750 // prefixes then AA-CHECK: should match the second one.
    751 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
    752                                          Check::CheckType &CheckTy,
    753                                          size_t &CheckLoc) {
    754   StringRef FirstPrefix;
    755   size_t FirstLoc = StringRef::npos;
    756   size_t SearchLoc = StringRef::npos;
    757   Check::CheckType FirstTy = Check::CheckNone;
    758 
    759   CheckTy = Check::CheckNone;
    760   CheckLoc = StringRef::npos;
    761 
    762   for (StringRef Prefix : CheckPrefixes) {
    763     size_t PrefixLoc = Buffer.find(Prefix);
    764 
    765     if (PrefixLoc == StringRef::npos)
    766       continue;
    767 
    768     // Track where we are searching for invalid prefixes that look almost right.
    769     // We need to only advance to the first partial match on the next attempt
    770     // since a partial match could be a substring of a later, valid prefix.
    771     // Need to skip to the end of the word, otherwise we could end up
    772     // matching a prefix in a substring later.
    773     if (PrefixLoc < SearchLoc)
    774       SearchLoc = SkipWord(Buffer, PrefixLoc);
    775 
    776     // We only want to find the first match to avoid skipping some.
    777     if (PrefixLoc > FirstLoc)
    778       continue;
    779     // If one matching check-prefix is a prefix of another, choose the
    780     // longer one.
    781     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
    782       continue;
    783 
    784     StringRef Rest = Buffer.drop_front(PrefixLoc);
    785     // Make sure we have actually found the prefix, and not a word containing
    786     // it. This should also prevent matching the wrong prefix when one is a
    787     // substring of another.
    788     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
    789       FirstTy = Check::CheckNone;
    790     else
    791       FirstTy = FindCheckType(Rest, Prefix);
    792 
    793     FirstLoc = PrefixLoc;
    794     FirstPrefix = Prefix;
    795   }
    796 
    797   // If the first prefix is invalid, we should continue the search after it.
    798   if (FirstTy == Check::CheckNone) {
    799     CheckLoc = SearchLoc;
    800     return "";
    801   }
    802 
    803   CheckTy = FirstTy;
    804   CheckLoc = FirstLoc;
    805   return FirstPrefix;
    806 }
    807 
    808 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
    809                                          unsigned &LineNumber,
    810                                          Check::CheckType &CheckTy,
    811                                          size_t &CheckLoc) {
    812   while (!Buffer.empty()) {
    813     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
    814     // If we found a real match, we are done.
    815     if (!Prefix.empty()) {
    816       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
    817       return Prefix;
    818     }
    819 
    820     // We didn't find any almost matches either, we are also done.
    821     if (CheckLoc == StringRef::npos)
    822       return StringRef();
    823 
    824     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
    825 
    826     // Advance to the last possible match we found and try again.
    827     Buffer = Buffer.drop_front(CheckLoc + 1);
    828   }
    829 
    830   return StringRef();
    831 }
    832 
    833 /// ReadCheckFile - Read the check file, which specifies the sequence of
    834 /// expected strings.  The strings are added to the CheckStrings vector.
    835 /// Returns true in case of an error, false otherwise.
    836 static bool ReadCheckFile(SourceMgr &SM,
    837                           std::vector<CheckString> &CheckStrings) {
    838   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
    839       MemoryBuffer::getFileOrSTDIN(CheckFilename);
    840   if (std::error_code EC = FileOrErr.getError()) {
    841     errs() << "Could not open check file '" << CheckFilename
    842            << "': " << EC.message() << '\n';
    843     return true;
    844   }
    845 
    846   // If we want to canonicalize whitespace, strip excess whitespace from the
    847   // buffer containing the CHECK lines. Remove DOS style line endings.
    848   std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
    849       std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
    850 
    851   // Find all instances of CheckPrefix followed by : in the file.
    852   StringRef Buffer = F->getBuffer();
    853 
    854   SM.AddNewSourceBuffer(std::move(F), SMLoc());
    855 
    856   std::vector<Pattern> ImplicitNegativeChecks;
    857   for (const auto &PatternString : ImplicitCheckNot) {
    858     // Create a buffer with fake command line content in order to display the
    859     // command line option responsible for the specific implicit CHECK-NOT.
    860     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
    861     std::string Suffix = "'";
    862     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
    863         Prefix + PatternString + Suffix, "command line");
    864 
    865     StringRef PatternInBuffer =
    866         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
    867     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
    868 
    869     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
    870     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
    871                                                "IMPLICIT-CHECK", SM, 0);
    872   }
    873 
    874 
    875   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
    876 
    877   // LineNumber keeps track of the line on which CheckPrefix instances are
    878   // found.
    879   unsigned LineNumber = 1;
    880 
    881   while (1) {
    882     Check::CheckType CheckTy;
    883     size_t PrefixLoc;
    884 
    885     // See if a prefix occurs in the memory buffer.
    886     StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
    887                                                    LineNumber,
    888                                                    CheckTy,
    889                                                    PrefixLoc);
    890     if (UsedPrefix.empty())
    891       break;
    892 
    893     Buffer = Buffer.drop_front(PrefixLoc);
    894 
    895     // Location to use for error messages.
    896     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
    897 
    898     // PrefixLoc is to the start of the prefix. Skip to the end.
    899     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
    900 
    901     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
    902     // leading and trailing whitespace.
    903     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
    904 
    905     // Scan ahead to the end of line.
    906     size_t EOL = Buffer.find_first_of("\n\r");
    907 
    908     // Remember the location of the start of the pattern, for diagnostics.
    909     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
    910 
    911     // Parse the pattern.
    912     Pattern P(CheckTy);
    913     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
    914       return true;
    915 
    916     // Verify that CHECK-LABEL lines do not define or use variables
    917     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
    918       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
    919                       SourceMgr::DK_Error,
    920                       "found '" + UsedPrefix + "-LABEL:'"
    921                       " with variable definition or use");
    922       return true;
    923     }
    924 
    925     Buffer = Buffer.substr(EOL);
    926 
    927     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
    928     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
    929         CheckStrings.empty()) {
    930       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
    931       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
    932                       SourceMgr::DK_Error,
    933                       "found '" + UsedPrefix + "-" + Type + "' without previous '"
    934                       + UsedPrefix + ": line");
    935       return true;
    936     }
    937 
    938     // Handle CHECK-DAG/-NOT.
    939     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
    940       DagNotMatches.push_back(P);
    941       continue;
    942     }
    943 
    944     // Okay, add the string we captured to the output vector and move on.
    945     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc, CheckTy);
    946     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
    947     DagNotMatches = ImplicitNegativeChecks;
    948   }
    949 
    950   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
    951   // prefix as a filler for the error message.
    952   if (!DagNotMatches.empty()) {
    953     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
    954                               SMLoc::getFromPointer(Buffer.data()),
    955                               Check::CheckEOF);
    956     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
    957   }
    958 
    959   if (CheckStrings.empty()) {
    960     errs() << "error: no check strings found with prefix"
    961            << (CheckPrefixes.size() > 1 ? "es " : " ");
    962     prefix_iterator I = CheckPrefixes.begin();
    963     prefix_iterator E = CheckPrefixes.end();
    964     if (I != E) {
    965       errs() << "\'" << *I << ":'";
    966       ++I;
    967     }
    968     for (; I != E; ++I)
    969       errs() << ", \'" << *I << ":'";
    970 
    971     errs() << '\n';
    972     return true;
    973   }
    974 
    975   return false;
    976 }
    977 
    978 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc,
    979                              const Pattern &Pat, StringRef Buffer,
    980                              StringMap<StringRef> &VariableTable) {
    981   // Otherwise, we have an error, emit an error message.
    982   SM.PrintMessage(Loc, SourceMgr::DK_Error,
    983                   "expected string not found in input");
    984 
    985   // Print the "scanning from here" line.  If the current position is at the
    986   // end of a line, advance to the start of the next line.
    987   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
    988 
    989   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
    990                   "scanning from here");
    991 
    992   // Allow the pattern to print additional information if desired.
    993   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
    994 }
    995 
    996 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
    997                              StringRef Buffer,
    998                              StringMap<StringRef> &VariableTable) {
    999   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
   1000 }
   1001 
   1002 /// CountNumNewlinesBetween - Count the number of newlines in the specified
   1003 /// range.
   1004 static unsigned CountNumNewlinesBetween(StringRef Range,
   1005                                         const char *&FirstNewLine) {
   1006   unsigned NumNewLines = 0;
   1007   while (1) {
   1008     // Scan for newline.
   1009     Range = Range.substr(Range.find_first_of("\n\r"));
   1010     if (Range.empty()) return NumNewLines;
   1011 
   1012     ++NumNewLines;
   1013 
   1014     // Handle \n\r and \r\n as a single newline.
   1015     if (Range.size() > 1 &&
   1016         (Range[1] == '\n' || Range[1] == '\r') &&
   1017         (Range[0] != Range[1]))
   1018       Range = Range.substr(1);
   1019     Range = Range.substr(1);
   1020 
   1021     if (NumNewLines == 1)
   1022       FirstNewLine = Range.begin();
   1023   }
   1024 }
   1025 
   1026 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
   1027                           bool IsLabelScanMode, size_t &MatchLen,
   1028                           StringMap<StringRef> &VariableTable) const {
   1029   size_t LastPos = 0;
   1030   std::vector<const Pattern *> NotStrings;
   1031 
   1032   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
   1033   // bounds; we have not processed variable definitions within the bounded block
   1034   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
   1035   // over the block again (including the last CHECK-LABEL) in normal mode.
   1036   if (!IsLabelScanMode) {
   1037     // Match "dag strings" (with mixed "not strings" if any).
   1038     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
   1039     if (LastPos == StringRef::npos)
   1040       return StringRef::npos;
   1041   }
   1042 
   1043   // Match itself from the last position after matching CHECK-DAG.
   1044   StringRef MatchBuffer = Buffer.substr(LastPos);
   1045   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
   1046   if (MatchPos == StringRef::npos) {
   1047     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
   1048     return StringRef::npos;
   1049   }
   1050 
   1051   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
   1052   // or CHECK-NOT
   1053   if (!IsLabelScanMode) {
   1054     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
   1055 
   1056     // If this check is a "CHECK-NEXT", verify that the previous match was on
   1057     // the previous line (i.e. that there is one newline between them).
   1058     if (CheckNext(SM, SkippedRegion))
   1059       return StringRef::npos;
   1060 
   1061     // If this check is a "CHECK-SAME", verify that the previous match was on
   1062     // the same line (i.e. that there is no newline between them).
   1063     if (CheckSame(SM, SkippedRegion))
   1064       return StringRef::npos;
   1065 
   1066     // If this match had "not strings", verify that they don't exist in the
   1067     // skipped region.
   1068     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
   1069       return StringRef::npos;
   1070   }
   1071 
   1072   return LastPos + MatchPos;
   1073 }
   1074 
   1075 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
   1076   if (CheckTy != Check::CheckNext)
   1077     return false;
   1078 
   1079   // Count the number of newlines between the previous match and this one.
   1080   assert(Buffer.data() !=
   1081          SM.getMemoryBuffer(
   1082            SM.FindBufferContainingLoc(
   1083              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
   1084          "CHECK-NEXT can't be the first check in a file");
   1085 
   1086   const char *FirstNewLine = nullptr;
   1087   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
   1088 
   1089   if (NumNewLines == 0) {
   1090     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
   1091                     "-NEXT: is on the same line as previous match");
   1092     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
   1093                     SourceMgr::DK_Note, "'next' match was here");
   1094     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
   1095                     "previous match ended here");
   1096     return true;
   1097   }
   1098 
   1099   if (NumNewLines != 1) {
   1100     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
   1101                     "-NEXT: is not on the line after the previous match");
   1102     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
   1103                     SourceMgr::DK_Note, "'next' match was here");
   1104     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
   1105                     "previous match ended here");
   1106     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
   1107                     "non-matching line after previous match is here");
   1108     return true;
   1109   }
   1110 
   1111   return false;
   1112 }
   1113 
   1114 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
   1115   if (CheckTy != Check::CheckSame)
   1116     return false;
   1117 
   1118   // Count the number of newlines between the previous match and this one.
   1119   assert(Buffer.data() !=
   1120              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
   1121                                     SMLoc::getFromPointer(Buffer.data())))
   1122                  ->getBufferStart() &&
   1123          "CHECK-SAME can't be the first check in a file");
   1124 
   1125   const char *FirstNewLine = nullptr;
   1126   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
   1127 
   1128   if (NumNewLines != 0) {
   1129     SM.PrintMessage(Loc, SourceMgr::DK_Error,
   1130                     Prefix +
   1131                         "-SAME: is not on the same line as the previous match");
   1132     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
   1133                     "'next' match was here");
   1134     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
   1135                     "previous match ended here");
   1136     return true;
   1137   }
   1138 
   1139   return false;
   1140 }
   1141 
   1142 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
   1143                            const std::vector<const Pattern *> &NotStrings,
   1144                            StringMap<StringRef> &VariableTable) const {
   1145   for (const Pattern *Pat : NotStrings) {
   1146     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
   1147 
   1148     size_t MatchLen = 0;
   1149     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
   1150 
   1151     if (Pos == StringRef::npos) continue;
   1152 
   1153     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
   1154                     SourceMgr::DK_Error,
   1155                     Prefix + "-NOT: string occurred!");
   1156     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
   1157                     Prefix + "-NOT: pattern specified here");
   1158     return true;
   1159   }
   1160 
   1161   return false;
   1162 }
   1163 
   1164 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
   1165                              std::vector<const Pattern *> &NotStrings,
   1166                              StringMap<StringRef> &VariableTable) const {
   1167   if (DagNotStrings.empty())
   1168     return 0;
   1169 
   1170   size_t LastPos = 0;
   1171   size_t StartPos = LastPos;
   1172 
   1173   for (const Pattern &Pat : DagNotStrings) {
   1174     assert((Pat.getCheckTy() == Check::CheckDAG ||
   1175             Pat.getCheckTy() == Check::CheckNot) &&
   1176            "Invalid CHECK-DAG or CHECK-NOT!");
   1177 
   1178     if (Pat.getCheckTy() == Check::CheckNot) {
   1179       NotStrings.push_back(&Pat);
   1180       continue;
   1181     }
   1182 
   1183     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
   1184 
   1185     size_t MatchLen = 0, MatchPos;
   1186 
   1187     // CHECK-DAG always matches from the start.
   1188     StringRef MatchBuffer = Buffer.substr(StartPos);
   1189     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
   1190     // With a group of CHECK-DAGs, a single mismatching means the match on
   1191     // that group of CHECK-DAGs fails immediately.
   1192     if (MatchPos == StringRef::npos) {
   1193       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
   1194       return StringRef::npos;
   1195     }
   1196     // Re-calc it as the offset relative to the start of the original string.
   1197     MatchPos += StartPos;
   1198 
   1199     if (!NotStrings.empty()) {
   1200       if (MatchPos < LastPos) {
   1201         // Reordered?
   1202         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
   1203                         SourceMgr::DK_Error,
   1204                         Prefix + "-DAG: found a match of CHECK-DAG"
   1205                         " reordering across a CHECK-NOT");
   1206         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
   1207                         SourceMgr::DK_Note,
   1208                         Prefix + "-DAG: the farthest match of CHECK-DAG"
   1209                         " is found here");
   1210         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
   1211                         Prefix + "-NOT: the crossed pattern specified"
   1212                         " here");
   1213         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
   1214                         Prefix + "-DAG: the reordered pattern specified"
   1215                         " here");
   1216         return StringRef::npos;
   1217       }
   1218       // All subsequent CHECK-DAGs should be matched from the farthest
   1219       // position of all precedent CHECK-DAGs (including this one.)
   1220       StartPos = LastPos;
   1221       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
   1222       // CHECK-DAG, verify that there's no 'not' strings occurred in that
   1223       // region.
   1224       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
   1225       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
   1226         return StringRef::npos;
   1227       // Clear "not strings".
   1228       NotStrings.clear();
   1229     }
   1230 
   1231     // Update the last position with CHECK-DAG matches.
   1232     LastPos = std::max(MatchPos + MatchLen, LastPos);
   1233   }
   1234 
   1235   return LastPos;
   1236 }
   1237 
   1238 // A check prefix must contain only alphanumeric, hyphens and underscores.
   1239 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
   1240   Regex Validator("^[a-zA-Z0-9_-]*$");
   1241   return Validator.match(CheckPrefix);
   1242 }
   1243 
   1244 static bool ValidateCheckPrefixes() {
   1245   StringSet<> PrefixSet;
   1246 
   1247   for (StringRef Prefix : CheckPrefixes) {
   1248     // Reject empty prefixes.
   1249     if (Prefix == "")
   1250       return false;
   1251 
   1252     if (!PrefixSet.insert(Prefix).second)
   1253       return false;
   1254 
   1255     if (!ValidateCheckPrefix(Prefix))
   1256       return false;
   1257   }
   1258 
   1259   return true;
   1260 }
   1261 
   1262 // I don't think there's a way to specify an initial value for cl::list,
   1263 // so if nothing was specified, add the default
   1264 static void AddCheckPrefixIfNeeded() {
   1265   if (CheckPrefixes.empty())
   1266     CheckPrefixes.push_back("CHECK");
   1267 }
   1268 
   1269 int main(int argc, char **argv) {
   1270   sys::PrintStackTraceOnErrorSignal();
   1271   PrettyStackTraceProgram X(argc, argv);
   1272   cl::ParseCommandLineOptions(argc, argv);
   1273 
   1274   if (!ValidateCheckPrefixes()) {
   1275     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
   1276               "start with a letter and contain only alphanumeric characters, "
   1277               "hyphens and underscores\n";
   1278     return 2;
   1279   }
   1280 
   1281   AddCheckPrefixIfNeeded();
   1282 
   1283   SourceMgr SM;
   1284 
   1285   // Read the expected strings from the check file.
   1286   std::vector<CheckString> CheckStrings;
   1287   if (ReadCheckFile(SM, CheckStrings))
   1288     return 2;
   1289 
   1290   // Open the file to check and add it to SourceMgr.
   1291   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
   1292       MemoryBuffer::getFileOrSTDIN(InputFilename);
   1293   if (std::error_code EC = FileOrErr.getError()) {
   1294     errs() << "Could not open input file '" << InputFilename
   1295            << "': " << EC.message() << '\n';
   1296     return 2;
   1297   }
   1298   std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
   1299 
   1300   if (File->getBufferSize() == 0 && !AllowEmptyInput) {
   1301     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
   1302     return 2;
   1303   }
   1304 
   1305   // Remove duplicate spaces in the input file if requested.
   1306   // Remove DOS style line endings.
   1307   std::unique_ptr<MemoryBuffer> F =
   1308       CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
   1309 
   1310   // Check that we have all of the expected strings, in order, in the input
   1311   // file.
   1312   StringRef Buffer = F->getBuffer();
   1313 
   1314   SM.AddNewSourceBuffer(std::move(F), SMLoc());
   1315 
   1316   /// VariableTable - This holds all the current filecheck variables.
   1317   StringMap<StringRef> VariableTable;
   1318 
   1319   bool hasError = false;
   1320 
   1321   unsigned i = 0, j = 0, e = CheckStrings.size();
   1322 
   1323   while (true) {
   1324     StringRef CheckRegion;
   1325     if (j == e) {
   1326       CheckRegion = Buffer;
   1327     } else {
   1328       const CheckString &CheckLabelStr = CheckStrings[j];
   1329       if (CheckLabelStr.CheckTy != Check::CheckLabel) {
   1330         ++j;
   1331         continue;
   1332       }
   1333 
   1334       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
   1335       size_t MatchLabelLen = 0;
   1336       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
   1337                                                  MatchLabelLen, VariableTable);
   1338       if (MatchLabelPos == StringRef::npos) {
   1339         hasError = true;
   1340         break;
   1341       }
   1342 
   1343       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
   1344       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
   1345       ++j;
   1346     }
   1347 
   1348     for ( ; i != j; ++i) {
   1349       const CheckString &CheckStr = CheckStrings[i];
   1350 
   1351       // Check each string within the scanned region, including a second check
   1352       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
   1353       size_t MatchLen = 0;
   1354       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
   1355                                        VariableTable);
   1356 
   1357       if (MatchPos == StringRef::npos) {
   1358         hasError = true;
   1359         i = j;
   1360         break;
   1361       }
   1362 
   1363       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
   1364     }
   1365 
   1366     if (j == e)
   1367       break;
   1368   }
   1369 
   1370   return hasError ? 1 : 0;
   1371 }
   1372