Home | History | Annotate | Download | only in FileCheck
      1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // FileCheck does a line-by line check of a file that validates whether it
     11 // contains the expected content.  This is useful for regression tests etc.
     12 //
     13 // This program exits with an error status of 2 on error, exit status of 0 if
     14 // the file matched the expected contents, and exit status of 1 if it did not
     15 // contain the expected contents.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "llvm/ADT/SmallString.h"
     20 #include "llvm/ADT/StringExtras.h"
     21 #include "llvm/ADT/StringMap.h"
     22 #include "llvm/ADT/StringSet.h"
     23 #include "llvm/Support/CommandLine.h"
     24 #include "llvm/Support/MemoryBuffer.h"
     25 #include "llvm/Support/PrettyStackTrace.h"
     26 #include "llvm/Support/Regex.h"
     27 #include "llvm/Support/Signals.h"
     28 #include "llvm/Support/SourceMgr.h"
     29 #include "llvm/Support/raw_ostream.h"
     30 #include <algorithm>
     31 #include <cctype>
     32 #include <map>
     33 #include <string>
     34 #include <system_error>
     35 #include <vector>
     36 using namespace llvm;
     37 
     38 static cl::opt<std::string>
     39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
     40 
     41 static cl::opt<std::string>
     42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
     43               cl::init("-"), cl::value_desc("filename"));
     44 
     45 static cl::list<std::string>
     46 CheckPrefixes("check-prefix",
     47               cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
     48 
     49 static cl::opt<bool>
     50 NoCanonicalizeWhiteSpace("strict-whitespace",
     51               cl::desc("Do not treat all horizontal whitespace as equivalent"));
     52 
     53 typedef cl::list<std::string>::const_iterator prefix_iterator;
     54 
     55 //===----------------------------------------------------------------------===//
     56 // Pattern Handling Code.
     57 //===----------------------------------------------------------------------===//
     58 
     59 namespace Check {
     60   enum CheckType {
     61     CheckNone = 0,
     62     CheckPlain,
     63     CheckNext,
     64     CheckNot,
     65     CheckDAG,
     66     CheckLabel,
     67 
     68     /// MatchEOF - When set, this pattern only matches the end of file. This is
     69     /// used for trailing CHECK-NOTs.
     70     CheckEOF
     71   };
     72 }
     73 
     74 class Pattern {
     75   SMLoc PatternLoc;
     76 
     77   Check::CheckType CheckTy;
     78 
     79   /// FixedStr - If non-empty, this pattern is a fixed string match with the
     80   /// specified fixed string.
     81   StringRef FixedStr;
     82 
     83   /// RegEx - If non-empty, this is a regex pattern.
     84   std::string RegExStr;
     85 
     86   /// \brief Contains the number of line this pattern is in.
     87   unsigned LineNumber;
     88 
     89   /// VariableUses - Entries in this vector map to uses of a variable in the
     90   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
     91   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
     92   /// value of bar at offset 3.
     93   std::vector<std::pair<StringRef, unsigned> > VariableUses;
     94 
     95   /// VariableDefs - Maps definitions of variables to their parenthesized
     96   /// capture numbers.
     97   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
     98   std::map<StringRef, unsigned> VariableDefs;
     99 
    100 public:
    101 
    102   Pattern(Check::CheckType Ty)
    103     : CheckTy(Ty) { }
    104 
    105   /// getLoc - Return the location in source code.
    106   SMLoc getLoc() const { return PatternLoc; }
    107 
    108   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
    109   /// which prefix is being matched, SM provides the SourceMgr used for error
    110   /// reports, and LineNumber is the line number in the input file from which
    111   /// the pattern string was read.  Returns true in case of an error, false
    112   /// otherwise.
    113   bool ParsePattern(StringRef PatternStr,
    114                     StringRef Prefix,
    115                     SourceMgr &SM,
    116                     unsigned LineNumber);
    117 
    118   /// Match - Match the pattern string against the input buffer Buffer.  This
    119   /// returns the position that is matched or npos if there is no match.  If
    120   /// there is a match, the size of the matched string is returned in MatchLen.
    121   ///
    122   /// The VariableTable StringMap provides the current values of filecheck
    123   /// variables and is updated if this match defines new values.
    124   size_t Match(StringRef Buffer, size_t &MatchLen,
    125                StringMap<StringRef> &VariableTable) const;
    126 
    127   /// PrintFailureInfo - Print additional information about a failure to match
    128   /// involving this pattern.
    129   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
    130                         const StringMap<StringRef> &VariableTable) const;
    131 
    132   bool hasVariable() const { return !(VariableUses.empty() &&
    133                                       VariableDefs.empty()); }
    134 
    135   Check::CheckType getCheckTy() const { return CheckTy; }
    136 
    137 private:
    138   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
    139   void AddBackrefToRegEx(unsigned BackrefNum);
    140 
    141   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
    142   /// matching this pattern at the start of \arg Buffer; a distance of zero
    143   /// should correspond to a perfect match.
    144   unsigned ComputeMatchDistance(StringRef Buffer,
    145                                const StringMap<StringRef> &VariableTable) const;
    146 
    147   /// \brief Evaluates expression and stores the result to \p Value.
    148   /// \return true on success. false when the expression has invalid syntax.
    149   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
    150 
    151   /// \brief Finds the closing sequence of a regex variable usage or
    152   /// definition. Str has to point in the beginning of the definition
    153   /// (right after the opening sequence).
    154   /// \return offset of the closing sequence within Str, or npos if it was not
    155   /// found.
    156   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
    157 };
    158 
    159 
    160 bool Pattern::ParsePattern(StringRef PatternStr,
    161                            StringRef Prefix,
    162                            SourceMgr &SM,
    163                            unsigned LineNumber) {
    164   this->LineNumber = LineNumber;
    165   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
    166 
    167   // Ignore trailing whitespace.
    168   while (!PatternStr.empty() &&
    169          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
    170     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
    171 
    172   // Check that there is something on the line.
    173   if (PatternStr.empty()) {
    174     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
    175                     "found empty check string with prefix '" +
    176                     Prefix + ":'");
    177     return true;
    178   }
    179 
    180   // Check to see if this is a fixed string, or if it has regex pieces.
    181   if (PatternStr.size() < 2 ||
    182       (PatternStr.find("{{") == StringRef::npos &&
    183        PatternStr.find("[[") == StringRef::npos)) {
    184     FixedStr = PatternStr;
    185     return false;
    186   }
    187 
    188   // Paren value #0 is for the fully matched string.  Any new parenthesized
    189   // values add from there.
    190   unsigned CurParen = 1;
    191 
    192   // Otherwise, there is at least one regex piece.  Build up the regex pattern
    193   // by escaping scary characters in fixed strings, building up one big regex.
    194   while (!PatternStr.empty()) {
    195     // RegEx matches.
    196     if (PatternStr.startswith("{{")) {
    197       // This is the start of a regex match.  Scan for the }}.
    198       size_t End = PatternStr.find("}}");
    199       if (End == StringRef::npos) {
    200         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
    201                         SourceMgr::DK_Error,
    202                         "found start of regex string with no end '}}'");
    203         return true;
    204       }
    205 
    206       // Enclose {{}} patterns in parens just like [[]] even though we're not
    207       // capturing the result for any purpose.  This is required in case the
    208       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
    209       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
    210       RegExStr += '(';
    211       ++CurParen;
    212 
    213       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
    214         return true;
    215       RegExStr += ')';
    216 
    217       PatternStr = PatternStr.substr(End+2);
    218       continue;
    219     }
    220 
    221     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
    222     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
    223     // second form is [[foo]] which is a reference to foo.  The variable name
    224     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
    225     // it.  This is to catch some common errors.
    226     if (PatternStr.startswith("[[")) {
    227       // Find the closing bracket pair ending the match.  End is going to be an
    228       // offset relative to the beginning of the match string.
    229       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
    230 
    231       if (End == StringRef::npos) {
    232         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
    233                         SourceMgr::DK_Error,
    234                         "invalid named regex reference, no ]] found");
    235         return true;
    236       }
    237 
    238       StringRef MatchStr = PatternStr.substr(2, End);
    239       PatternStr = PatternStr.substr(End+4);
    240 
    241       // Get the regex name (e.g. "foo").
    242       size_t NameEnd = MatchStr.find(':');
    243       StringRef Name = MatchStr.substr(0, NameEnd);
    244 
    245       if (Name.empty()) {
    246         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
    247                         "invalid name in named regex: empty name");
    248         return true;
    249       }
    250 
    251       // Verify that the name/expression is well formed. FileCheck currently
    252       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
    253       // is relaxed, more strict check is performed in \c EvaluateExpression.
    254       bool IsExpression = false;
    255       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
    256         if (i == 0 && Name[i] == '@') {
    257           if (NameEnd != StringRef::npos) {
    258             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
    259                             SourceMgr::DK_Error,
    260                             "invalid name in named regex definition");
    261             return true;
    262           }
    263           IsExpression = true;
    264           continue;
    265         }
    266         if (Name[i] != '_' && !isalnum(Name[i]) &&
    267             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
    268           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
    269                           SourceMgr::DK_Error, "invalid name in named regex");
    270           return true;
    271         }
    272       }
    273 
    274       // Name can't start with a digit.
    275       if (isdigit(static_cast<unsigned char>(Name[0]))) {
    276         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
    277                         "invalid name in named regex");
    278         return true;
    279       }
    280 
    281       // Handle [[foo]].
    282       if (NameEnd == StringRef::npos) {
    283         // Handle variables that were defined earlier on the same line by
    284         // emitting a backreference.
    285         if (VariableDefs.find(Name) != VariableDefs.end()) {
    286           unsigned VarParenNum = VariableDefs[Name];
    287           if (VarParenNum < 1 || VarParenNum > 9) {
    288             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
    289                             SourceMgr::DK_Error,
    290                             "Can't back-reference more than 9 variables");
    291             return true;
    292           }
    293           AddBackrefToRegEx(VarParenNum);
    294         } else {
    295           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
    296         }
    297         continue;
    298       }
    299 
    300       // Handle [[foo:.*]].
    301       VariableDefs[Name] = CurParen;
    302       RegExStr += '(';
    303       ++CurParen;
    304 
    305       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
    306         return true;
    307 
    308       RegExStr += ')';
    309     }
    310 
    311     // Handle fixed string matches.
    312     // Find the end, which is the start of the next regex.
    313     size_t FixedMatchEnd = PatternStr.find("{{");
    314     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
    315     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
    316     PatternStr = PatternStr.substr(FixedMatchEnd);
    317   }
    318 
    319   return false;
    320 }
    321 
    322 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
    323                               SourceMgr &SM) {
    324   Regex R(RS);
    325   std::string Error;
    326   if (!R.isValid(Error)) {
    327     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
    328                     "invalid regex: " + Error);
    329     return true;
    330   }
    331 
    332   RegExStr += RS.str();
    333   CurParen += R.getNumMatches();
    334   return false;
    335 }
    336 
    337 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
    338   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
    339   std::string Backref = std::string("\\") +
    340                         std::string(1, '0' + BackrefNum);
    341   RegExStr += Backref;
    342 }
    343 
    344 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
    345   // The only supported expression is @LINE([\+-]\d+)?
    346   if (!Expr.startswith("@LINE"))
    347     return false;
    348   Expr = Expr.substr(StringRef("@LINE").size());
    349   int Offset = 0;
    350   if (!Expr.empty()) {
    351     if (Expr[0] == '+')
    352       Expr = Expr.substr(1);
    353     else if (Expr[0] != '-')
    354       return false;
    355     if (Expr.getAsInteger(10, Offset))
    356       return false;
    357   }
    358   Value = llvm::itostr(LineNumber + Offset);
    359   return true;
    360 }
    361 
    362 /// Match - Match the pattern string against the input buffer Buffer.  This
    363 /// returns the position that is matched or npos if there is no match.  If
    364 /// there is a match, the size of the matched string is returned in MatchLen.
    365 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
    366                       StringMap<StringRef> &VariableTable) const {
    367   // If this is the EOF pattern, match it immediately.
    368   if (CheckTy == Check::CheckEOF) {
    369     MatchLen = 0;
    370     return Buffer.size();
    371   }
    372 
    373   // If this is a fixed string pattern, just match it now.
    374   if (!FixedStr.empty()) {
    375     MatchLen = FixedStr.size();
    376     return Buffer.find(FixedStr);
    377   }
    378 
    379   // Regex match.
    380 
    381   // If there are variable uses, we need to create a temporary string with the
    382   // actual value.
    383   StringRef RegExToMatch = RegExStr;
    384   std::string TmpStr;
    385   if (!VariableUses.empty()) {
    386     TmpStr = RegExStr;
    387 
    388     unsigned InsertOffset = 0;
    389     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
    390       std::string Value;
    391 
    392       if (VariableUses[i].first[0] == '@') {
    393         if (!EvaluateExpression(VariableUses[i].first, Value))
    394           return StringRef::npos;
    395       } else {
    396         StringMap<StringRef>::iterator it =
    397           VariableTable.find(VariableUses[i].first);
    398         // If the variable is undefined, return an error.
    399         if (it == VariableTable.end())
    400           return StringRef::npos;
    401 
    402         // Look up the value and escape it so that we can put it into the regex.
    403         Value += Regex::escape(it->second);
    404       }
    405 
    406       // Plop it into the regex at the adjusted offset.
    407       TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
    408                     Value.begin(), Value.end());
    409       InsertOffset += Value.size();
    410     }
    411 
    412     // Match the newly constructed regex.
    413     RegExToMatch = TmpStr;
    414   }
    415 
    416 
    417   SmallVector<StringRef, 4> MatchInfo;
    418   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
    419     return StringRef::npos;
    420 
    421   // Successful regex match.
    422   assert(!MatchInfo.empty() && "Didn't get any match");
    423   StringRef FullMatch = MatchInfo[0];
    424 
    425   // If this defines any variables, remember their values.
    426   for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
    427                                                      E = VariableDefs.end();
    428        I != E; ++I) {
    429     assert(I->second < MatchInfo.size() && "Internal paren error");
    430     VariableTable[I->first] = MatchInfo[I->second];
    431   }
    432 
    433   MatchLen = FullMatch.size();
    434   return FullMatch.data()-Buffer.data();
    435 }
    436 
    437 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
    438                               const StringMap<StringRef> &VariableTable) const {
    439   // Just compute the number of matching characters. For regular expressions, we
    440   // just compare against the regex itself and hope for the best.
    441   //
    442   // FIXME: One easy improvement here is have the regex lib generate a single
    443   // example regular expression which matches, and use that as the example
    444   // string.
    445   StringRef ExampleString(FixedStr);
    446   if (ExampleString.empty())
    447     ExampleString = RegExStr;
    448 
    449   // Only compare up to the first line in the buffer, or the string size.
    450   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
    451   BufferPrefix = BufferPrefix.split('\n').first;
    452   return BufferPrefix.edit_distance(ExampleString);
    453 }
    454 
    455 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
    456                                const StringMap<StringRef> &VariableTable) const{
    457   // If this was a regular expression using variables, print the current
    458   // variable values.
    459   if (!VariableUses.empty()) {
    460     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
    461       SmallString<256> Msg;
    462       raw_svector_ostream OS(Msg);
    463       StringRef Var = VariableUses[i].first;
    464       if (Var[0] == '@') {
    465         std::string Value;
    466         if (EvaluateExpression(Var, Value)) {
    467           OS << "with expression \"";
    468           OS.write_escaped(Var) << "\" equal to \"";
    469           OS.write_escaped(Value) << "\"";
    470         } else {
    471           OS << "uses incorrect expression \"";
    472           OS.write_escaped(Var) << "\"";
    473         }
    474       } else {
    475         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
    476 
    477         // Check for undefined variable references.
    478         if (it == VariableTable.end()) {
    479           OS << "uses undefined variable \"";
    480           OS.write_escaped(Var) << "\"";
    481         } else {
    482           OS << "with variable \"";
    483           OS.write_escaped(Var) << "\" equal to \"";
    484           OS.write_escaped(it->second) << "\"";
    485         }
    486       }
    487 
    488       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
    489                       OS.str());
    490     }
    491   }
    492 
    493   // Attempt to find the closest/best fuzzy match.  Usually an error happens
    494   // because some string in the output didn't exactly match. In these cases, we
    495   // would like to show the user a best guess at what "should have" matched, to
    496   // save them having to actually check the input manually.
    497   size_t NumLinesForward = 0;
    498   size_t Best = StringRef::npos;
    499   double BestQuality = 0;
    500 
    501   // Use an arbitrary 4k limit on how far we will search.
    502   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
    503     if (Buffer[i] == '\n')
    504       ++NumLinesForward;
    505 
    506     // Patterns have leading whitespace stripped, so skip whitespace when
    507     // looking for something which looks like a pattern.
    508     if (Buffer[i] == ' ' || Buffer[i] == '\t')
    509       continue;
    510 
    511     // Compute the "quality" of this match as an arbitrary combination of the
    512     // match distance and the number of lines skipped to get to this match.
    513     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
    514     double Quality = Distance + (NumLinesForward / 100.);
    515 
    516     if (Quality < BestQuality || Best == StringRef::npos) {
    517       Best = i;
    518       BestQuality = Quality;
    519     }
    520   }
    521 
    522   // Print the "possible intended match here" line if we found something
    523   // reasonable and not equal to what we showed in the "scanning from here"
    524   // line.
    525   if (Best && Best != StringRef::npos && BestQuality < 50) {
    526       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
    527                       SourceMgr::DK_Note, "possible intended match here");
    528 
    529     // FIXME: If we wanted to be really friendly we would show why the match
    530     // failed, as it can be hard to spot simple one character differences.
    531   }
    532 }
    533 
    534 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
    535   // Offset keeps track of the current offset within the input Str
    536   size_t Offset = 0;
    537   // [...] Nesting depth
    538   size_t BracketDepth = 0;
    539 
    540   while (!Str.empty()) {
    541     if (Str.startswith("]]") && BracketDepth == 0)
    542       return Offset;
    543     if (Str[0] == '\\') {
    544       // Backslash escapes the next char within regexes, so skip them both.
    545       Str = Str.substr(2);
    546       Offset += 2;
    547     } else {
    548       switch (Str[0]) {
    549         default:
    550           break;
    551         case '[':
    552           BracketDepth++;
    553           break;
    554         case ']':
    555           if (BracketDepth == 0) {
    556             SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
    557                             SourceMgr::DK_Error,
    558                             "missing closing \"]\" for regex variable");
    559             exit(1);
    560           }
    561           BracketDepth--;
    562           break;
    563       }
    564       Str = Str.substr(1);
    565       Offset++;
    566     }
    567   }
    568 
    569   return StringRef::npos;
    570 }
    571 
    572 
    573 //===----------------------------------------------------------------------===//
    574 // Check Strings.
    575 //===----------------------------------------------------------------------===//
    576 
    577 /// CheckString - This is a check that we found in the input file.
    578 struct CheckString {
    579   /// Pat - The pattern to match.
    580   Pattern Pat;
    581 
    582   /// Prefix - Which prefix name this check matched.
    583   StringRef Prefix;
    584 
    585   /// Loc - The location in the match file that the check string was specified.
    586   SMLoc Loc;
    587 
    588   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
    589   /// as opposed to a CHECK: directive.
    590   Check::CheckType CheckTy;
    591 
    592   /// DagNotStrings - These are all of the strings that are disallowed from
    593   /// occurring between this match string and the previous one (or start of
    594   /// file).
    595   std::vector<Pattern> DagNotStrings;
    596 
    597 
    598   CheckString(const Pattern &P,
    599               StringRef S,
    600               SMLoc L,
    601               Check::CheckType Ty)
    602     : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
    603 
    604   /// Check - Match check string and its "not strings" and/or "dag strings".
    605   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
    606                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
    607 
    608   /// CheckNext - Verify there is a single line in the given buffer.
    609   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
    610 
    611   /// CheckNot - Verify there's no "not strings" in the given buffer.
    612   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
    613                 const std::vector<const Pattern *> &NotStrings,
    614                 StringMap<StringRef> &VariableTable) const;
    615 
    616   /// CheckDag - Match "dag strings" and their mixed "not strings".
    617   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
    618                   std::vector<const Pattern *> &NotStrings,
    619                   StringMap<StringRef> &VariableTable) const;
    620 };
    621 
    622 /// Canonicalize whitespaces in the input file. Line endings are replaced
    623 /// with UNIX-style '\n'.
    624 ///
    625 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
    626 /// characters to a single space.
    627 static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB,
    628                                            bool PreserveHorizontal) {
    629   SmallString<128> NewFile;
    630   NewFile.reserve(MB->getBufferSize());
    631 
    632   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
    633        Ptr != End; ++Ptr) {
    634     // Eliminate trailing dosish \r.
    635     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
    636       continue;
    637     }
    638 
    639     // If current char is not a horizontal whitespace or if horizontal
    640     // whitespace canonicalization is disabled, dump it to output as is.
    641     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
    642       NewFile.push_back(*Ptr);
    643       continue;
    644     }
    645 
    646     // Otherwise, add one space and advance over neighboring space.
    647     NewFile.push_back(' ');
    648     while (Ptr+1 != End &&
    649            (Ptr[1] == ' ' || Ptr[1] == '\t'))
    650       ++Ptr;
    651   }
    652 
    653   // Free the old buffer and return a new one.
    654   MemoryBuffer *MB2 =
    655     MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier());
    656 
    657   delete MB;
    658   return MB2;
    659 }
    660 
    661 static bool IsPartOfWord(char c) {
    662   return (isalnum(c) || c == '-' || c == '_');
    663 }
    664 
    665 // Get the size of the prefix extension.
    666 static size_t CheckTypeSize(Check::CheckType Ty) {
    667   switch (Ty) {
    668   case Check::CheckNone:
    669     return 0;
    670 
    671   case Check::CheckPlain:
    672     return sizeof(":") - 1;
    673 
    674   case Check::CheckNext:
    675     return sizeof("-NEXT:") - 1;
    676 
    677   case Check::CheckNot:
    678     return sizeof("-NOT:") - 1;
    679 
    680   case Check::CheckDAG:
    681     return sizeof("-DAG:") - 1;
    682 
    683   case Check::CheckLabel:
    684     return sizeof("-LABEL:") - 1;
    685 
    686   case Check::CheckEOF:
    687     llvm_unreachable("Should not be using EOF size");
    688   }
    689 
    690   llvm_unreachable("Bad check type");
    691 }
    692 
    693 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
    694   char NextChar = Buffer[Prefix.size()];
    695 
    696   // Verify that the : is present after the prefix.
    697   if (NextChar == ':')
    698     return Check::CheckPlain;
    699 
    700   if (NextChar != '-')
    701     return Check::CheckNone;
    702 
    703   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
    704   if (Rest.startswith("NEXT:"))
    705     return Check::CheckNext;
    706 
    707   if (Rest.startswith("NOT:"))
    708     return Check::CheckNot;
    709 
    710   if (Rest.startswith("DAG:"))
    711     return Check::CheckDAG;
    712 
    713   if (Rest.startswith("LABEL:"))
    714     return Check::CheckLabel;
    715 
    716   return Check::CheckNone;
    717 }
    718 
    719 // From the given position, find the next character after the word.
    720 static size_t SkipWord(StringRef Str, size_t Loc) {
    721   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
    722     ++Loc;
    723   return Loc;
    724 }
    725 
    726 // Try to find the first match in buffer for any prefix. If a valid match is
    727 // found, return that prefix and set its type and location.  If there are almost
    728 // matches (e.g. the actual prefix string is found, but is not an actual check
    729 // string), but no valid match, return an empty string and set the position to
    730 // resume searching from. If no partial matches are found, return an empty
    731 // string and the location will be StringRef::npos. If one prefix is a substring
    732 // of another, the maximal match should be found. e.g. if "A" and "AA" are
    733 // prefixes then AA-CHECK: should match the second one.
    734 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
    735                                          Check::CheckType &CheckTy,
    736                                          size_t &CheckLoc) {
    737   StringRef FirstPrefix;
    738   size_t FirstLoc = StringRef::npos;
    739   size_t SearchLoc = StringRef::npos;
    740   Check::CheckType FirstTy = Check::CheckNone;
    741 
    742   CheckTy = Check::CheckNone;
    743   CheckLoc = StringRef::npos;
    744 
    745   for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
    746        I != E; ++I) {
    747     StringRef Prefix(*I);
    748     size_t PrefixLoc = Buffer.find(Prefix);
    749 
    750     if (PrefixLoc == StringRef::npos)
    751       continue;
    752 
    753     // Track where we are searching for invalid prefixes that look almost right.
    754     // We need to only advance to the first partial match on the next attempt
    755     // since a partial match could be a substring of a later, valid prefix.
    756     // Need to skip to the end of the word, otherwise we could end up
    757     // matching a prefix in a substring later.
    758     if (PrefixLoc < SearchLoc)
    759       SearchLoc = SkipWord(Buffer, PrefixLoc);
    760 
    761     // We only want to find the first match to avoid skipping some.
    762     if (PrefixLoc > FirstLoc)
    763       continue;
    764     // If one matching check-prefix is a prefix of another, choose the
    765     // longer one.
    766     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
    767       continue;
    768 
    769     StringRef Rest = Buffer.drop_front(PrefixLoc);
    770     // Make sure we have actually found the prefix, and not a word containing
    771     // it. This should also prevent matching the wrong prefix when one is a
    772     // substring of another.
    773     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
    774       FirstTy = Check::CheckNone;
    775     else
    776       FirstTy = FindCheckType(Rest, Prefix);
    777 
    778     FirstLoc = PrefixLoc;
    779     FirstPrefix = Prefix;
    780   }
    781 
    782   // If the first prefix is invalid, we should continue the search after it.
    783   if (FirstTy == Check::CheckNone) {
    784     CheckLoc = SearchLoc;
    785     return "";
    786   }
    787 
    788   CheckTy = FirstTy;
    789   CheckLoc = FirstLoc;
    790   return FirstPrefix;
    791 }
    792 
    793 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
    794                                          unsigned &LineNumber,
    795                                          Check::CheckType &CheckTy,
    796                                          size_t &CheckLoc) {
    797   while (!Buffer.empty()) {
    798     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
    799     // If we found a real match, we are done.
    800     if (!Prefix.empty()) {
    801       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
    802       return Prefix;
    803     }
    804 
    805     // We didn't find any almost matches either, we are also done.
    806     if (CheckLoc == StringRef::npos)
    807       return StringRef();
    808 
    809     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
    810 
    811     // Advance to the last possible match we found and try again.
    812     Buffer = Buffer.drop_front(CheckLoc + 1);
    813   }
    814 
    815   return StringRef();
    816 }
    817 
    818 /// ReadCheckFile - Read the check file, which specifies the sequence of
    819 /// expected strings.  The strings are added to the CheckStrings vector.
    820 /// Returns true in case of an error, false otherwise.
    821 static bool ReadCheckFile(SourceMgr &SM,
    822                           std::vector<CheckString> &CheckStrings) {
    823   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
    824       MemoryBuffer::getFileOrSTDIN(CheckFilename);
    825   if (std::error_code EC = FileOrErr.getError()) {
    826     errs() << "Could not open check file '" << CheckFilename
    827            << "': " << EC.message() << '\n';
    828     return true;
    829   }
    830 
    831   // If we want to canonicalize whitespace, strip excess whitespace from the
    832   // buffer containing the CHECK lines. Remove DOS style line endings.
    833   MemoryBuffer *F = CanonicalizeInputFile(FileOrErr.get().release(),
    834                                           NoCanonicalizeWhiteSpace);
    835 
    836   SM.AddNewSourceBuffer(F, SMLoc());
    837 
    838   // Find all instances of CheckPrefix followed by : in the file.
    839   StringRef Buffer = F->getBuffer();
    840   std::vector<Pattern> DagNotMatches;
    841 
    842   // LineNumber keeps track of the line on which CheckPrefix instances are
    843   // found.
    844   unsigned LineNumber = 1;
    845 
    846   while (1) {
    847     Check::CheckType CheckTy;
    848     size_t PrefixLoc;
    849 
    850     // See if a prefix occurs in the memory buffer.
    851     StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
    852                                                    LineNumber,
    853                                                    CheckTy,
    854                                                    PrefixLoc);
    855     if (UsedPrefix.empty())
    856       break;
    857 
    858     Buffer = Buffer.drop_front(PrefixLoc);
    859 
    860     // Location to use for error messages.
    861     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
    862 
    863     // PrefixLoc is to the start of the prefix. Skip to the end.
    864     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
    865 
    866     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
    867     // leading and trailing whitespace.
    868     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
    869 
    870     // Scan ahead to the end of line.
    871     size_t EOL = Buffer.find_first_of("\n\r");
    872 
    873     // Remember the location of the start of the pattern, for diagnostics.
    874     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
    875 
    876     // Parse the pattern.
    877     Pattern P(CheckTy);
    878     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
    879       return true;
    880 
    881     // Verify that CHECK-LABEL lines do not define or use variables
    882     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
    883       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
    884                       SourceMgr::DK_Error,
    885                       "found '" + UsedPrefix + "-LABEL:'"
    886                       " with variable definition or use");
    887       return true;
    888     }
    889 
    890     Buffer = Buffer.substr(EOL);
    891 
    892     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
    893     if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) {
    894       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
    895                       SourceMgr::DK_Error,
    896                       "found '" + UsedPrefix + "-NEXT:' without previous '"
    897                       + UsedPrefix + ": line");
    898       return true;
    899     }
    900 
    901     // Handle CHECK-DAG/-NOT.
    902     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
    903       DagNotMatches.push_back(P);
    904       continue;
    905     }
    906 
    907     // Okay, add the string we captured to the output vector and move on.
    908     CheckStrings.push_back(CheckString(P,
    909                                        UsedPrefix,
    910                                        PatternLoc,
    911                                        CheckTy));
    912     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
    913   }
    914 
    915   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
    916   // prefix as a filler for the error message.
    917   if (!DagNotMatches.empty()) {
    918     CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
    919                                        CheckPrefixes[0],
    920                                        SMLoc::getFromPointer(Buffer.data()),
    921                                        Check::CheckEOF));
    922     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
    923   }
    924 
    925   if (CheckStrings.empty()) {
    926     errs() << "error: no check strings found with prefix"
    927            << (CheckPrefixes.size() > 1 ? "es " : " ");
    928     for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
    929       StringRef Prefix(CheckPrefixes[I]);
    930       errs() << '\'' << Prefix << ":'";
    931       if (I != N - 1)
    932         errs() << ", ";
    933     }
    934 
    935     errs() << '\n';
    936     return true;
    937   }
    938 
    939   return false;
    940 }
    941 
    942 static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
    943                              const Pattern &Pat, StringRef Buffer,
    944                              StringMap<StringRef> &VariableTable) {
    945   // Otherwise, we have an error, emit an error message.
    946   SM.PrintMessage(Loc, SourceMgr::DK_Error,
    947                   "expected string not found in input");
    948 
    949   // Print the "scanning from here" line.  If the current position is at the
    950   // end of a line, advance to the start of the next line.
    951   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
    952 
    953   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
    954                   "scanning from here");
    955 
    956   // Allow the pattern to print additional information if desired.
    957   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
    958 }
    959 
    960 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
    961                              StringRef Buffer,
    962                              StringMap<StringRef> &VariableTable) {
    963   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
    964 }
    965 
    966 /// CountNumNewlinesBetween - Count the number of newlines in the specified
    967 /// range.
    968 static unsigned CountNumNewlinesBetween(StringRef Range,
    969                                         const char *&FirstNewLine) {
    970   unsigned NumNewLines = 0;
    971   while (1) {
    972     // Scan for newline.
    973     Range = Range.substr(Range.find_first_of("\n\r"));
    974     if (Range.empty()) return NumNewLines;
    975 
    976     ++NumNewLines;
    977 
    978     // Handle \n\r and \r\n as a single newline.
    979     if (Range.size() > 1 &&
    980         (Range[1] == '\n' || Range[1] == '\r') &&
    981         (Range[0] != Range[1]))
    982       Range = Range.substr(1);
    983     Range = Range.substr(1);
    984 
    985     if (NumNewLines == 1)
    986       FirstNewLine = Range.begin();
    987   }
    988 }
    989 
    990 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
    991                           bool IsLabelScanMode, size_t &MatchLen,
    992                           StringMap<StringRef> &VariableTable) const {
    993   size_t LastPos = 0;
    994   std::vector<const Pattern *> NotStrings;
    995 
    996   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
    997   // bounds; we have not processed variable definitions within the bounded block
    998   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
    999   // over the block again (including the last CHECK-LABEL) in normal mode.
   1000   if (!IsLabelScanMode) {
   1001     // Match "dag strings" (with mixed "not strings" if any).
   1002     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
   1003     if (LastPos == StringRef::npos)
   1004       return StringRef::npos;
   1005   }
   1006 
   1007   // Match itself from the last position after matching CHECK-DAG.
   1008   StringRef MatchBuffer = Buffer.substr(LastPos);
   1009   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
   1010   if (MatchPos == StringRef::npos) {
   1011     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
   1012     return StringRef::npos;
   1013   }
   1014   MatchPos += LastPos;
   1015 
   1016   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
   1017   // or CHECK-NOT
   1018   if (!IsLabelScanMode) {
   1019     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
   1020 
   1021     // If this check is a "CHECK-NEXT", verify that the previous match was on
   1022     // the previous line (i.e. that there is one newline between them).
   1023     if (CheckNext(SM, SkippedRegion))
   1024       return StringRef::npos;
   1025 
   1026     // If this match had "not strings", verify that they don't exist in the
   1027     // skipped region.
   1028     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
   1029       return StringRef::npos;
   1030   }
   1031 
   1032   return MatchPos;
   1033 }
   1034 
   1035 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
   1036   if (CheckTy != Check::CheckNext)
   1037     return false;
   1038 
   1039   // Count the number of newlines between the previous match and this one.
   1040   assert(Buffer.data() !=
   1041          SM.getMemoryBuffer(
   1042            SM.FindBufferContainingLoc(
   1043              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
   1044          "CHECK-NEXT can't be the first check in a file");
   1045 
   1046   const char *FirstNewLine = nullptr;
   1047   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
   1048 
   1049   if (NumNewLines == 0) {
   1050     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
   1051                     "-NEXT: is on the same line as previous match");
   1052     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
   1053                     SourceMgr::DK_Note, "'next' match was here");
   1054     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
   1055                     "previous match ended here");
   1056     return true;
   1057   }
   1058 
   1059   if (NumNewLines != 1) {
   1060     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
   1061                     "-NEXT: is not on the line after the previous match");
   1062     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
   1063                     SourceMgr::DK_Note, "'next' match was here");
   1064     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
   1065                     "previous match ended here");
   1066     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
   1067                     "non-matching line after previous match is here");
   1068     return true;
   1069   }
   1070 
   1071   return false;
   1072 }
   1073 
   1074 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
   1075                            const std::vector<const Pattern *> &NotStrings,
   1076                            StringMap<StringRef> &VariableTable) const {
   1077   for (unsigned ChunkNo = 0, e = NotStrings.size();
   1078        ChunkNo != e; ++ChunkNo) {
   1079     const Pattern *Pat = NotStrings[ChunkNo];
   1080     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
   1081 
   1082     size_t MatchLen = 0;
   1083     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
   1084 
   1085     if (Pos == StringRef::npos) continue;
   1086 
   1087     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
   1088                     SourceMgr::DK_Error,
   1089                     Prefix + "-NOT: string occurred!");
   1090     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
   1091                     Prefix + "-NOT: pattern specified here");
   1092     return true;
   1093   }
   1094 
   1095   return false;
   1096 }
   1097 
   1098 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
   1099                              std::vector<const Pattern *> &NotStrings,
   1100                              StringMap<StringRef> &VariableTable) const {
   1101   if (DagNotStrings.empty())
   1102     return 0;
   1103 
   1104   size_t LastPos = 0;
   1105   size_t StartPos = LastPos;
   1106 
   1107   for (unsigned ChunkNo = 0, e = DagNotStrings.size();
   1108        ChunkNo != e; ++ChunkNo) {
   1109     const Pattern &Pat = DagNotStrings[ChunkNo];
   1110 
   1111     assert((Pat.getCheckTy() == Check::CheckDAG ||
   1112             Pat.getCheckTy() == Check::CheckNot) &&
   1113            "Invalid CHECK-DAG or CHECK-NOT!");
   1114 
   1115     if (Pat.getCheckTy() == Check::CheckNot) {
   1116       NotStrings.push_back(&Pat);
   1117       continue;
   1118     }
   1119 
   1120     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
   1121 
   1122     size_t MatchLen = 0, MatchPos;
   1123 
   1124     // CHECK-DAG always matches from the start.
   1125     StringRef MatchBuffer = Buffer.substr(StartPos);
   1126     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
   1127     // With a group of CHECK-DAGs, a single mismatching means the match on
   1128     // that group of CHECK-DAGs fails immediately.
   1129     if (MatchPos == StringRef::npos) {
   1130       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
   1131       return StringRef::npos;
   1132     }
   1133     // Re-calc it as the offset relative to the start of the original string.
   1134     MatchPos += StartPos;
   1135 
   1136     if (!NotStrings.empty()) {
   1137       if (MatchPos < LastPos) {
   1138         // Reordered?
   1139         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
   1140                         SourceMgr::DK_Error,
   1141                         Prefix + "-DAG: found a match of CHECK-DAG"
   1142                         " reordering across a CHECK-NOT");
   1143         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
   1144                         SourceMgr::DK_Note,
   1145                         Prefix + "-DAG: the farthest match of CHECK-DAG"
   1146                         " is found here");
   1147         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
   1148                         Prefix + "-NOT: the crossed pattern specified"
   1149                         " here");
   1150         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
   1151                         Prefix + "-DAG: the reordered pattern specified"
   1152                         " here");
   1153         return StringRef::npos;
   1154       }
   1155       // All subsequent CHECK-DAGs should be matched from the farthest
   1156       // position of all precedent CHECK-DAGs (including this one.)
   1157       StartPos = LastPos;
   1158       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
   1159       // CHECK-DAG, verify that there's no 'not' strings occurred in that
   1160       // region.
   1161       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
   1162       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
   1163         return StringRef::npos;
   1164       // Clear "not strings".
   1165       NotStrings.clear();
   1166     }
   1167 
   1168     // Update the last position with CHECK-DAG matches.
   1169     LastPos = std::max(MatchPos + MatchLen, LastPos);
   1170   }
   1171 
   1172   return LastPos;
   1173 }
   1174 
   1175 // A check prefix must contain only alphanumeric, hyphens and underscores.
   1176 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
   1177   Regex Validator("^[a-zA-Z0-9_-]*$");
   1178   return Validator.match(CheckPrefix);
   1179 }
   1180 
   1181 static bool ValidateCheckPrefixes() {
   1182   StringSet<> PrefixSet;
   1183 
   1184   for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
   1185        I != E; ++I) {
   1186     StringRef Prefix(*I);
   1187 
   1188     if (!PrefixSet.insert(Prefix))
   1189       return false;
   1190 
   1191     if (!ValidateCheckPrefix(Prefix))
   1192       return false;
   1193   }
   1194 
   1195   return true;
   1196 }
   1197 
   1198 // I don't think there's a way to specify an initial value for cl::list,
   1199 // so if nothing was specified, add the default
   1200 static void AddCheckPrefixIfNeeded() {
   1201   if (CheckPrefixes.empty())
   1202     CheckPrefixes.push_back("CHECK");
   1203 }
   1204 
   1205 int main(int argc, char **argv) {
   1206   sys::PrintStackTraceOnErrorSignal();
   1207   PrettyStackTraceProgram X(argc, argv);
   1208   cl::ParseCommandLineOptions(argc, argv);
   1209 
   1210   if (!ValidateCheckPrefixes()) {
   1211     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
   1212               "start with a letter and contain only alphanumeric characters, "
   1213               "hyphens and underscores\n";
   1214     return 2;
   1215   }
   1216 
   1217   AddCheckPrefixIfNeeded();
   1218 
   1219   SourceMgr SM;
   1220 
   1221   // Read the expected strings from the check file.
   1222   std::vector<CheckString> CheckStrings;
   1223   if (ReadCheckFile(SM, CheckStrings))
   1224     return 2;
   1225 
   1226   // Open the file to check and add it to SourceMgr.
   1227   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
   1228       MemoryBuffer::getFileOrSTDIN(InputFilename);
   1229   if (std::error_code EC = FileOrErr.getError()) {
   1230     errs() << "Could not open input file '" << InputFilename
   1231            << "': " << EC.message() << '\n';
   1232     return 2;
   1233   }
   1234   std::unique_ptr<MemoryBuffer> File = std::move(FileOrErr.get());
   1235 
   1236   if (File->getBufferSize() == 0) {
   1237     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
   1238     return 2;
   1239   }
   1240 
   1241   // Remove duplicate spaces in the input file if requested.
   1242   // Remove DOS style line endings.
   1243   MemoryBuffer *F =
   1244     CanonicalizeInputFile(File.release(), NoCanonicalizeWhiteSpace);
   1245 
   1246   SM.AddNewSourceBuffer(F, SMLoc());
   1247 
   1248   /// VariableTable - This holds all the current filecheck variables.
   1249   StringMap<StringRef> VariableTable;
   1250 
   1251   // Check that we have all of the expected strings, in order, in the input
   1252   // file.
   1253   StringRef Buffer = F->getBuffer();
   1254 
   1255   bool hasError = false;
   1256 
   1257   unsigned i = 0, j = 0, e = CheckStrings.size();
   1258 
   1259   while (true) {
   1260     StringRef CheckRegion;
   1261     if (j == e) {
   1262       CheckRegion = Buffer;
   1263     } else {
   1264       const CheckString &CheckLabelStr = CheckStrings[j];
   1265       if (CheckLabelStr.CheckTy != Check::CheckLabel) {
   1266         ++j;
   1267         continue;
   1268       }
   1269 
   1270       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
   1271       size_t MatchLabelLen = 0;
   1272       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
   1273                                                  MatchLabelLen, VariableTable);
   1274       if (MatchLabelPos == StringRef::npos) {
   1275         hasError = true;
   1276         break;
   1277       }
   1278 
   1279       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
   1280       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
   1281       ++j;
   1282     }
   1283 
   1284     for ( ; i != j; ++i) {
   1285       const CheckString &CheckStr = CheckStrings[i];
   1286 
   1287       // Check each string within the scanned region, including a second check
   1288       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
   1289       size_t MatchLen = 0;
   1290       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
   1291                                        VariableTable);
   1292 
   1293       if (MatchPos == StringRef::npos) {
   1294         hasError = true;
   1295         i = j;
   1296         break;
   1297       }
   1298 
   1299       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
   1300     }
   1301 
   1302     if (j == e)
   1303       break;
   1304   }
   1305 
   1306   return hasError ? 1 : 0;
   1307 }
   1308