Home | History | Annotate | Download | only in FileCheck
      1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // FileCheck does a line-by line check of a file that validates whether it
     11 // contains the expected content.  This is useful for regression tests etc.
     12 //
     13 // This program exits with an exit status of 2 on error, exit status of 0 if
     14 // the file matched the expected contents, and exit status of 1 if it did not
     15 // contain the expected contents.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "llvm/ADT/SmallString.h"
     20 #include "llvm/ADT/StringExtras.h"
     21 #include "llvm/ADT/StringMap.h"
     22 #include "llvm/ADT/StringSet.h"
     23 #include "llvm/Support/CommandLine.h"
     24 #include "llvm/Support/InitLLVM.h"
     25 #include "llvm/Support/MemoryBuffer.h"
     26 #include "llvm/Support/Regex.h"
     27 #include "llvm/Support/SourceMgr.h"
     28 #include "llvm/Support/raw_ostream.h"
     29 #include <algorithm>
     30 #include <cctype>
     31 #include <list>
     32 #include <map>
     33 #include <string>
     34 #include <system_error>
     35 #include <vector>
     36 using namespace llvm;
     37 
     38 static cl::opt<std::string>
     39     CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
     40 
     41 static cl::opt<std::string>
     42     InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
     43                   cl::init("-"), cl::value_desc("filename"));
     44 
     45 static cl::list<std::string> CheckPrefixes(
     46     "check-prefix",
     47     cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
     48 static cl::alias CheckPrefixesAlias(
     49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
     50     cl::NotHidden,
     51     cl::desc(
     52         "Alias for -check-prefix permitting multiple comma separated values"));
     53 
     54 static cl::opt<bool> NoCanonicalizeWhiteSpace(
     55     "strict-whitespace",
     56     cl::desc("Do not treat all horizontal whitespace as equivalent"));
     57 
     58 static cl::list<std::string> ImplicitCheckNot(
     59     "implicit-check-not",
     60     cl::desc("Add an implicit negative check with this pattern to every\n"
     61              "positive check. This can be used to ensure that no instances of\n"
     62              "this pattern occur which are not matched by a positive pattern"),
     63     cl::value_desc("pattern"));
     64 
     65 static cl::list<std::string> GlobalDefines("D", cl::Prefix,
     66     cl::desc("Define a variable to be used in capture patterns."),
     67     cl::value_desc("VAR=VALUE"));
     68 
     69 static cl::opt<bool> AllowEmptyInput(
     70     "allow-empty", cl::init(false),
     71     cl::desc("Allow the input file to be empty. This is useful when making\n"
     72              "checks that some error message does not occur, for example."));
     73 
     74 static cl::opt<bool> MatchFullLines(
     75     "match-full-lines", cl::init(false),
     76     cl::desc("Require all positive matches to cover an entire input line.\n"
     77              "Allows leading and trailing whitespace if --strict-whitespace\n"
     78              "is not also passed."));
     79 
     80 static cl::opt<bool> EnableVarScope(
     81     "enable-var-scope", cl::init(false),
     82     cl::desc("Enables scope for regex variables. Variables with names that\n"
     83              "do not start with '$' will be reset at the beginning of\n"
     84              "each CHECK-LABEL block."));
     85 
     86 static cl::opt<bool> AllowDeprecatedDagOverlap(
     87     "allow-deprecated-dag-overlap", cl::init(false),
     88     cl::desc("Enable overlapping among matches in a group of consecutive\n"
     89              "CHECK-DAG directives.  This option is deprecated and is only\n"
     90              "provided for convenience as old tests are migrated to the new\n"
     91              "non-overlapping CHECK-DAG implementation.\n"));
     92 
     93 static cl::opt<bool> Verbose("v", cl::init(false),
     94                              cl::desc("Print directive pattern matches.\n"));
     95 
     96 static cl::opt<bool> VerboseVerbose(
     97     "vv", cl::init(false),
     98     cl::desc("Print information helpful in diagnosing internal FileCheck\n"
     99              "issues.  Implies -v.\n"));
    100 static const char * DumpInputEnv = "FILECHECK_DUMP_INPUT_ON_FAILURE";
    101 
    102 static cl::opt<bool> DumpInputOnFailure(
    103     "dump-input-on-failure", cl::init(std::getenv(DumpInputEnv)),
    104     cl::desc("Dump original input to stderr before failing.\n"
    105              "The value can be also controlled using\n"
    106              "FILECHECK_DUMP_INPUT_ON_FAILURE environment variable.\n"));
    107 
    108 typedef cl::list<std::string>::const_iterator prefix_iterator;
    109 
    110 //===----------------------------------------------------------------------===//
    111 // Pattern Handling Code.
    112 //===----------------------------------------------------------------------===//
    113 
    114 namespace Check {
    115 enum CheckType {
    116   CheckNone = 0,
    117   CheckPlain,
    118   CheckNext,
    119   CheckSame,
    120   CheckNot,
    121   CheckDAG,
    122   CheckLabel,
    123   CheckEmpty,
    124 
    125   /// Indicates the pattern only matches the end of file. This is used for
    126   /// trailing CHECK-NOTs.
    127   CheckEOF,
    128 
    129   /// Marks when parsing found a -NOT check combined with another CHECK suffix.
    130   CheckBadNot
    131 };
    132 }
    133 
    134 class Pattern {
    135   SMLoc PatternLoc;
    136 
    137   /// A fixed string to match as the pattern or empty if this pattern requires
    138   /// a regex match.
    139   StringRef FixedStr;
    140 
    141   /// A regex string to match as the pattern or empty if this pattern requires
    142   /// a fixed string to match.
    143   std::string RegExStr;
    144 
    145   /// Entries in this vector map to uses of a variable in the pattern, e.g.
    146   /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
    147   /// we'll get an entry in this vector that tells us to insert the value of
    148   /// bar at offset 3.
    149   std::vector<std::pair<StringRef, unsigned>> VariableUses;
    150 
    151   /// Maps definitions of variables to their parenthesized capture numbers.
    152   ///
    153   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
    154   /// 1.
    155   std::map<StringRef, unsigned> VariableDefs;
    156 
    157   Check::CheckType CheckTy;
    158 
    159   /// Contains the number of line this pattern is in.
    160   unsigned LineNumber;
    161 
    162 public:
    163   explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {}
    164 
    165   /// Returns the location in source code.
    166   SMLoc getLoc() const { return PatternLoc; }
    167 
    168   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
    169                     unsigned LineNumber);
    170   size_t Match(StringRef Buffer, size_t &MatchLen,
    171                StringMap<StringRef> &VariableTable) const;
    172   void PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
    173                          const StringMap<StringRef> &VariableTable,
    174                          SMRange MatchRange = None) const;
    175   void PrintFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
    176                        const StringMap<StringRef> &VariableTable) const;
    177 
    178   bool hasVariable() const {
    179     return !(VariableUses.empty() && VariableDefs.empty());
    180   }
    181 
    182   Check::CheckType getCheckTy() const { return CheckTy; }
    183 
    184 private:
    185   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
    186   void AddBackrefToRegEx(unsigned BackrefNum);
    187   unsigned
    188   ComputeMatchDistance(StringRef Buffer,
    189                        const StringMap<StringRef> &VariableTable) const;
    190   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
    191   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
    192 };
    193 
    194 /// Parses the given string into the Pattern.
    195 ///
    196 /// \p Prefix provides which prefix is being matched, \p SM provides the
    197 /// SourceMgr used for error reports, and \p LineNumber is the line number in
    198 /// the input file from which the pattern string was read. Returns true in
    199 /// case of an error, false otherwise.
    200 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
    201                            SourceMgr &SM, unsigned LineNumber) {
    202   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
    203 
    204   this->LineNumber = LineNumber;
    205   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
    206 
    207   if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
    208     // Ignore trailing whitespace.
    209     while (!PatternStr.empty() &&
    210            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
    211       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
    212 
    213   // Check that there is something on the line.
    214   if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
    215     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
    216                     "found empty check string with prefix '" + Prefix + ":'");
    217     return true;
    218   }
    219 
    220   if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
    221     SM.PrintMessage(
    222         PatternLoc, SourceMgr::DK_Error,
    223         "found non-empty check string for empty check with prefix '" + Prefix +
    224             ":'");
    225     return true;
    226   }
    227 
    228   if (CheckTy == Check::CheckEmpty) {
    229     RegExStr = "(\n$)";
    230     return false;
    231   }
    232 
    233   // Check to see if this is a fixed string, or if it has regex pieces.
    234   if (!MatchFullLinesHere &&
    235       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
    236                                  PatternStr.find("[[") == StringRef::npos))) {
    237     FixedStr = PatternStr;
    238     return false;
    239   }
    240 
    241   if (MatchFullLinesHere) {
    242     RegExStr += '^';
    243     if (!NoCanonicalizeWhiteSpace)
    244       RegExStr += " *";
    245   }
    246 
    247   // Paren value #0 is for the fully matched string.  Any new parenthesized
    248   // values add from there.
    249   unsigned CurParen = 1;
    250 
    251   // Otherwise, there is at least one regex piece.  Build up the regex pattern
    252   // by escaping scary characters in fixed strings, building up one big regex.
    253   while (!PatternStr.empty()) {
    254     // RegEx matches.
    255     if (PatternStr.startswith("{{")) {
    256       // This is the start of a regex match.  Scan for the }}.
    257       size_t End = PatternStr.find("}}");
    258       if (End == StringRef::npos) {
    259         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
    260                         SourceMgr::DK_Error,
    261                         "found start of regex string with no end '}}'");
    262         return true;
    263       }
    264 
    265       // Enclose {{}} patterns in parens just like [[]] even though we're not
    266       // capturing the result for any purpose.  This is required in case the
    267       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
    268       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
    269       RegExStr += '(';
    270       ++CurParen;
    271 
    272       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
    273         return true;
    274       RegExStr += ')';
    275 
    276       PatternStr = PatternStr.substr(End + 2);
    277       continue;
    278     }
    279 
    280     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
    281     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
    282     // second form is [[foo]] which is a reference to foo.  The variable name
    283     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
    284     // it.  This is to catch some common errors.
    285     if (PatternStr.startswith("[[")) {
    286       // Find the closing bracket pair ending the match.  End is going to be an
    287       // offset relative to the beginning of the match string.
    288       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
    289 
    290       if (End == StringRef::npos) {
    291         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
    292                         SourceMgr::DK_Error,
    293                         "invalid named regex reference, no ]] found");
    294         return true;
    295       }
    296 
    297       StringRef MatchStr = PatternStr.substr(2, End);
    298       PatternStr = PatternStr.substr(End + 4);
    299 
    300       // Get the regex name (e.g. "foo").
    301       size_t NameEnd = MatchStr.find(':');
    302       StringRef Name = MatchStr.substr(0, NameEnd);
    303 
    304       if (Name.empty()) {
    305         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
    306                         "invalid name in named regex: empty name");
    307         return true;
    308       }
    309 
    310       // Verify that the name/expression is well formed. FileCheck currently
    311       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
    312       // is relaxed, more strict check is performed in \c EvaluateExpression.
    313       bool IsExpression = false;
    314       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
    315         if (i == 0) {
    316           if (Name[i] == '$')  // Global vars start with '$'
    317             continue;
    318           if (Name[i] == '@') {
    319             if (NameEnd != StringRef::npos) {
    320               SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
    321                               SourceMgr::DK_Error,
    322                               "invalid name in named regex definition");
    323               return true;
    324             }
    325             IsExpression = true;
    326             continue;
    327           }
    328         }
    329         if (Name[i] != '_' && !isalnum(Name[i]) &&
    330             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
    331           SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
    332                           SourceMgr::DK_Error, "invalid name in named regex");
    333           return true;
    334         }
    335       }
    336 
    337       // Name can't start with a digit.
    338       if (isdigit(static_cast<unsigned char>(Name[0]))) {
    339         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
    340                         "invalid name in named regex");
    341         return true;
    342       }
    343 
    344       // Handle [[foo]].
    345       if (NameEnd == StringRef::npos) {
    346         // Handle variables that were defined earlier on the same line by
    347         // emitting a backreference.
    348         if (VariableDefs.find(Name) != VariableDefs.end()) {
    349           unsigned VarParenNum = VariableDefs[Name];
    350           if (VarParenNum < 1 || VarParenNum > 9) {
    351             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
    352                             SourceMgr::DK_Error,
    353                             "Can't back-reference more than 9 variables");
    354             return true;
    355           }
    356           AddBackrefToRegEx(VarParenNum);
    357         } else {
    358           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
    359         }
    360         continue;
    361       }
    362 
    363       // Handle [[foo:.*]].
    364       VariableDefs[Name] = CurParen;
    365       RegExStr += '(';
    366       ++CurParen;
    367 
    368       if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
    369         return true;
    370 
    371       RegExStr += ')';
    372     }
    373 
    374     // Handle fixed string matches.
    375     // Find the end, which is the start of the next regex.
    376     size_t FixedMatchEnd = PatternStr.find("{{");
    377     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
    378     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
    379     PatternStr = PatternStr.substr(FixedMatchEnd);
    380   }
    381 
    382   if (MatchFullLinesHere) {
    383     if (!NoCanonicalizeWhiteSpace)
    384       RegExStr += " *";
    385     RegExStr += '$';
    386   }
    387 
    388   return false;
    389 }
    390 
    391 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
    392   Regex R(RS);
    393   std::string Error;
    394   if (!R.isValid(Error)) {
    395     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
    396                     "invalid regex: " + Error);
    397     return true;
    398   }
    399 
    400   RegExStr += RS.str();
    401   CurParen += R.getNumMatches();
    402   return false;
    403 }
    404 
    405 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
    406   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
    407   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
    408   RegExStr += Backref;
    409 }
    410 
    411 /// Evaluates expression and stores the result to \p Value.
    412 ///
    413 /// Returns true on success and false when the expression has invalid syntax.
    414 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
    415   // The only supported expression is @LINE([\+-]\d+)?
    416   if (!Expr.startswith("@LINE"))
    417     return false;
    418   Expr = Expr.substr(StringRef("@LINE").size());
    419   int Offset = 0;
    420   if (!Expr.empty()) {
    421     if (Expr[0] == '+')
    422       Expr = Expr.substr(1);
    423     else if (Expr[0] != '-')
    424       return false;
    425     if (Expr.getAsInteger(10, Offset))
    426       return false;
    427   }
    428   Value = llvm::itostr(LineNumber + Offset);
    429   return true;
    430 }
    431 
    432 /// Matches the pattern string against the input buffer \p Buffer
    433 ///
    434 /// This returns the position that is matched or npos if there is no match. If
    435 /// there is a match, the size of the matched string is returned in \p
    436 /// MatchLen.
    437 ///
    438 /// The \p VariableTable StringMap provides the current values of filecheck
    439 /// variables and is updated if this match defines new values.
    440 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
    441                       StringMap<StringRef> &VariableTable) const {
    442   // If this is the EOF pattern, match it immediately.
    443   if (CheckTy == Check::CheckEOF) {
    444     MatchLen = 0;
    445     return Buffer.size();
    446   }
    447 
    448   // If this is a fixed string pattern, just match it now.
    449   if (!FixedStr.empty()) {
    450     MatchLen = FixedStr.size();
    451     return Buffer.find(FixedStr);
    452   }
    453 
    454   // Regex match.
    455 
    456   // If there are variable uses, we need to create a temporary string with the
    457   // actual value.
    458   StringRef RegExToMatch = RegExStr;
    459   std::string TmpStr;
    460   if (!VariableUses.empty()) {
    461     TmpStr = RegExStr;
    462 
    463     unsigned InsertOffset = 0;
    464     for (const auto &VariableUse : VariableUses) {
    465       std::string Value;
    466 
    467       if (VariableUse.first[0] == '@') {
    468         if (!EvaluateExpression(VariableUse.first, Value))
    469           return StringRef::npos;
    470       } else {
    471         StringMap<StringRef>::iterator it =
    472             VariableTable.find(VariableUse.first);
    473         // If the variable is undefined, return an error.
    474         if (it == VariableTable.end())
    475           return StringRef::npos;
    476 
    477         // Look up the value and escape it so that we can put it into the regex.
    478         Value += Regex::escape(it->second);
    479       }
    480 
    481       // Plop it into the regex at the adjusted offset.
    482       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
    483                     Value.begin(), Value.end());
    484       InsertOffset += Value.size();
    485     }
    486 
    487     // Match the newly constructed regex.
    488     RegExToMatch = TmpStr;
    489   }
    490 
    491   SmallVector<StringRef, 4> MatchInfo;
    492   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
    493     return StringRef::npos;
    494 
    495   // Successful regex match.
    496   assert(!MatchInfo.empty() && "Didn't get any match");
    497   StringRef FullMatch = MatchInfo[0];
    498 
    499   // If this defines any variables, remember their values.
    500   for (const auto &VariableDef : VariableDefs) {
    501     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
    502     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
    503   }
    504 
    505   // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
    506   // the required preceding newline, which is consumed by the pattern in the
    507   // case of CHECK-EMPTY but not CHECK-NEXT.
    508   size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
    509   MatchLen = FullMatch.size() - MatchStartSkip;
    510   return FullMatch.data() - Buffer.data() + MatchStartSkip;
    511 }
    512 
    513 
    514 /// Computes an arbitrary estimate for the quality of matching this pattern at
    515 /// the start of \p Buffer; a distance of zero should correspond to a perfect
    516 /// match.
    517 unsigned
    518 Pattern::ComputeMatchDistance(StringRef Buffer,
    519                               const StringMap<StringRef> &VariableTable) const {
    520   // Just compute the number of matching characters. For regular expressions, we
    521   // just compare against the regex itself and hope for the best.
    522   //
    523   // FIXME: One easy improvement here is have the regex lib generate a single
    524   // example regular expression which matches, and use that as the example
    525   // string.
    526   StringRef ExampleString(FixedStr);
    527   if (ExampleString.empty())
    528     ExampleString = RegExStr;
    529 
    530   // Only compare up to the first line in the buffer, or the string size.
    531   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
    532   BufferPrefix = BufferPrefix.split('\n').first;
    533   return BufferPrefix.edit_distance(ExampleString);
    534 }
    535 
    536 void Pattern::PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
    537                                 const StringMap<StringRef> &VariableTable,
    538                                 SMRange MatchRange) const {
    539   // If this was a regular expression using variables, print the current
    540   // variable values.
    541   if (!VariableUses.empty()) {
    542     for (const auto &VariableUse : VariableUses) {
    543       SmallString<256> Msg;
    544       raw_svector_ostream OS(Msg);
    545       StringRef Var = VariableUse.first;
    546       if (Var[0] == '@') {
    547         std::string Value;
    548         if (EvaluateExpression(Var, Value)) {
    549           OS << "with expression \"";
    550           OS.write_escaped(Var) << "\" equal to \"";
    551           OS.write_escaped(Value) << "\"";
    552         } else {
    553           OS << "uses incorrect expression \"";
    554           OS.write_escaped(Var) << "\"";
    555         }
    556       } else {
    557         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
    558 
    559         // Check for undefined variable references.
    560         if (it == VariableTable.end()) {
    561           OS << "uses undefined variable \"";
    562           OS.write_escaped(Var) << "\"";
    563         } else {
    564           OS << "with variable \"";
    565           OS.write_escaped(Var) << "\" equal to \"";
    566           OS.write_escaped(it->second) << "\"";
    567         }
    568       }
    569 
    570       if (MatchRange.isValid())
    571         SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
    572                         {MatchRange});
    573       else
    574         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
    575                         SourceMgr::DK_Note, OS.str());
    576     }
    577   }
    578 }
    579 
    580 void Pattern::PrintFuzzyMatch(
    581     const SourceMgr &SM, StringRef Buffer,
    582     const StringMap<StringRef> &VariableTable) const {
    583   // Attempt to find the closest/best fuzzy match.  Usually an error happens
    584   // because some string in the output didn't exactly match. In these cases, we
    585   // would like to show the user a best guess at what "should have" matched, to
    586   // save them having to actually check the input manually.
    587   size_t NumLinesForward = 0;
    588   size_t Best = StringRef::npos;
    589   double BestQuality = 0;
    590 
    591   // Use an arbitrary 4k limit on how far we will search.
    592   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
    593     if (Buffer[i] == '\n')
    594       ++NumLinesForward;
    595 
    596     // Patterns have leading whitespace stripped, so skip whitespace when
    597     // looking for something which looks like a pattern.
    598     if (Buffer[i] == ' ' || Buffer[i] == '\t')
    599       continue;
    600 
    601     // Compute the "quality" of this match as an arbitrary combination of the
    602     // match distance and the number of lines skipped to get to this match.
    603     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
    604     double Quality = Distance + (NumLinesForward / 100.);
    605 
    606     if (Quality < BestQuality || Best == StringRef::npos) {
    607       Best = i;
    608       BestQuality = Quality;
    609     }
    610   }
    611 
    612   // Print the "possible intended match here" line if we found something
    613   // reasonable and not equal to what we showed in the "scanning from here"
    614   // line.
    615   if (Best && Best != StringRef::npos && BestQuality < 50) {
    616     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
    617                     SourceMgr::DK_Note, "possible intended match here");
    618 
    619     // FIXME: If we wanted to be really friendly we would show why the match
    620     // failed, as it can be hard to spot simple one character differences.
    621   }
    622 }
    623 
    624 /// Finds the closing sequence of a regex variable usage or definition.
    625 ///
    626 /// \p Str has to point in the beginning of the definition (right after the
    627 /// opening sequence). Returns the offset of the closing sequence within Str,
    628 /// or npos if it was not found.
    629 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
    630   // Offset keeps track of the current offset within the input Str
    631   size_t Offset = 0;
    632   // [...] Nesting depth
    633   size_t BracketDepth = 0;
    634 
    635   while (!Str.empty()) {
    636     if (Str.startswith("]]") && BracketDepth == 0)
    637       return Offset;
    638     if (Str[0] == '\\') {
    639       // Backslash escapes the next char within regexes, so skip them both.
    640       Str = Str.substr(2);
    641       Offset += 2;
    642     } else {
    643       switch (Str[0]) {
    644       default:
    645         break;
    646       case '[':
    647         BracketDepth++;
    648         break;
    649       case ']':
    650         if (BracketDepth == 0) {
    651           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
    652                           SourceMgr::DK_Error,
    653                           "missing closing \"]\" for regex variable");
    654           exit(1);
    655         }
    656         BracketDepth--;
    657         break;
    658       }
    659       Str = Str.substr(1);
    660       Offset++;
    661     }
    662   }
    663 
    664   return StringRef::npos;
    665 }
    666 
    667 //===----------------------------------------------------------------------===//
    668 // Check Strings.
    669 //===----------------------------------------------------------------------===//
    670 
    671 /// A check that we found in the input file.
    672 struct CheckString {
    673   /// The pattern to match.
    674   Pattern Pat;
    675 
    676   /// Which prefix name this check matched.
    677   StringRef Prefix;
    678 
    679   /// The location in the match file that the check string was specified.
    680   SMLoc Loc;
    681 
    682   /// All of the strings that are disallowed from occurring between this match
    683   /// string and the previous one (or start of file).
    684   std::vector<Pattern> DagNotStrings;
    685 
    686   CheckString(const Pattern &P, StringRef S, SMLoc L)
    687       : Pat(P), Prefix(S), Loc(L) {}
    688 
    689   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
    690                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
    691 
    692   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
    693   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
    694   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
    695                 const std::vector<const Pattern *> &NotStrings,
    696                 StringMap<StringRef> &VariableTable) const;
    697   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
    698                   std::vector<const Pattern *> &NotStrings,
    699                   StringMap<StringRef> &VariableTable) const;
    700 };
    701 
    702 /// Canonicalize whitespaces in the file. Line endings are replaced with
    703 /// UNIX-style '\n'.
    704 static StringRef CanonicalizeFile(MemoryBuffer &MB,
    705                                   SmallVectorImpl<char> &OutputBuffer) {
    706   OutputBuffer.reserve(MB.getBufferSize());
    707 
    708   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
    709        Ptr != End; ++Ptr) {
    710     // Eliminate trailing dosish \r.
    711     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
    712       continue;
    713     }
    714 
    715     // If current char is not a horizontal whitespace or if horizontal
    716     // whitespace canonicalization is disabled, dump it to output as is.
    717     if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
    718       OutputBuffer.push_back(*Ptr);
    719       continue;
    720     }
    721 
    722     // Otherwise, add one space and advance over neighboring space.
    723     OutputBuffer.push_back(' ');
    724     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
    725       ++Ptr;
    726   }
    727 
    728   // Add a null byte and then return all but that byte.
    729   OutputBuffer.push_back('\0');
    730   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
    731 }
    732 
    733 static bool IsPartOfWord(char c) {
    734   return (isalnum(c) || c == '-' || c == '_');
    735 }
    736 
    737 // Get the size of the prefix extension.
    738 static size_t CheckTypeSize(Check::CheckType Ty) {
    739   switch (Ty) {
    740   case Check::CheckNone:
    741   case Check::CheckBadNot:
    742     return 0;
    743 
    744   case Check::CheckPlain:
    745     return sizeof(":") - 1;
    746 
    747   case Check::CheckNext:
    748     return sizeof("-NEXT:") - 1;
    749 
    750   case Check::CheckSame:
    751     return sizeof("-SAME:") - 1;
    752 
    753   case Check::CheckNot:
    754     return sizeof("-NOT:") - 1;
    755 
    756   case Check::CheckDAG:
    757     return sizeof("-DAG:") - 1;
    758 
    759   case Check::CheckLabel:
    760     return sizeof("-LABEL:") - 1;
    761 
    762   case Check::CheckEmpty:
    763     return sizeof("-EMPTY:") - 1;
    764 
    765   case Check::CheckEOF:
    766     llvm_unreachable("Should not be using EOF size");
    767   }
    768 
    769   llvm_unreachable("Bad check type");
    770 }
    771 
    772 // Get a description of the type.
    773 static std::string CheckTypeName(StringRef Prefix, Check::CheckType Ty) {
    774   switch (Ty) {
    775   case Check::CheckNone:
    776     return "invalid";
    777   case Check::CheckPlain:
    778     return Prefix;
    779   case Check::CheckNext:
    780     return Prefix.str() + "-NEXT";
    781   case Check::CheckSame:
    782     return Prefix.str() + "-SAME";
    783   case Check::CheckNot:
    784     return Prefix.str() + "-NOT";
    785   case Check::CheckDAG:
    786     return Prefix.str() + "-DAG";
    787   case Check::CheckLabel:
    788     return Prefix.str() + "-LABEL";
    789   case Check::CheckEmpty:
    790     return Prefix.str() + "-EMPTY";
    791   case Check::CheckEOF:
    792     return "implicit EOF";
    793   case Check::CheckBadNot:
    794     return "bad NOT";
    795   }
    796   llvm_unreachable("unknown CheckType");
    797 }
    798 
    799 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
    800   if (Buffer.size() <= Prefix.size())
    801     return Check::CheckNone;
    802 
    803   char NextChar = Buffer[Prefix.size()];
    804 
    805   // Verify that the : is present after the prefix.
    806   if (NextChar == ':')
    807     return Check::CheckPlain;
    808 
    809   if (NextChar != '-')
    810     return Check::CheckNone;
    811 
    812   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
    813   if (Rest.startswith("NEXT:"))
    814     return Check::CheckNext;
    815 
    816   if (Rest.startswith("SAME:"))
    817     return Check::CheckSame;
    818 
    819   if (Rest.startswith("NOT:"))
    820     return Check::CheckNot;
    821 
    822   if (Rest.startswith("DAG:"))
    823     return Check::CheckDAG;
    824 
    825   if (Rest.startswith("LABEL:"))
    826     return Check::CheckLabel;
    827 
    828   if (Rest.startswith("EMPTY:"))
    829     return Check::CheckEmpty;
    830 
    831   // You can't combine -NOT with another suffix.
    832   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
    833       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
    834       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
    835       Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
    836     return Check::CheckBadNot;
    837 
    838   return Check::CheckNone;
    839 }
    840 
    841 // From the given position, find the next character after the word.
    842 static size_t SkipWord(StringRef Str, size_t Loc) {
    843   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
    844     ++Loc;
    845   return Loc;
    846 }
    847 
    848 /// Search the buffer for the first prefix in the prefix regular expression.
    849 ///
    850 /// This searches the buffer using the provided regular expression, however it
    851 /// enforces constraints beyond that:
    852 /// 1) The found prefix must not be a suffix of something that looks like
    853 ///    a valid prefix.
    854 /// 2) The found prefix must be followed by a valid check type suffix using \c
    855 ///    FindCheckType above.
    856 ///
    857 /// The first match of the regular expression to satisfy these two is returned,
    858 /// otherwise an empty StringRef is returned to indicate failure.
    859 ///
    860 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
    861 /// start at the beginning of the returned prefix, increment \p LineNumber for
    862 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
    863 /// check found by examining the suffix.
    864 ///
    865 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
    866 /// is unspecified.
    867 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
    868                                          unsigned &LineNumber,
    869                                          Check::CheckType &CheckTy) {
    870   SmallVector<StringRef, 2> Matches;
    871 
    872   while (!Buffer.empty()) {
    873     // Find the first (longest) match using the RE.
    874     if (!PrefixRE.match(Buffer, &Matches))
    875       // No match at all, bail.
    876       return StringRef();
    877 
    878     StringRef Prefix = Matches[0];
    879     Matches.clear();
    880 
    881     assert(Prefix.data() >= Buffer.data() &&
    882            Prefix.data() < Buffer.data() + Buffer.size() &&
    883            "Prefix doesn't start inside of buffer!");
    884     size_t Loc = Prefix.data() - Buffer.data();
    885     StringRef Skipped = Buffer.substr(0, Loc);
    886     Buffer = Buffer.drop_front(Loc);
    887     LineNumber += Skipped.count('\n');
    888 
    889     // Check that the matched prefix isn't a suffix of some other check-like
    890     // word.
    891     // FIXME: This is a very ad-hoc check. it would be better handled in some
    892     // other way. Among other things it seems hard to distinguish between
    893     // intentional and unintentional uses of this feature.
    894     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
    895       // Now extract the type.
    896       CheckTy = FindCheckType(Buffer, Prefix);
    897 
    898       // If we've found a valid check type for this prefix, we're done.
    899       if (CheckTy != Check::CheckNone)
    900         return Prefix;
    901     }
    902 
    903     // If we didn't successfully find a prefix, we need to skip this invalid
    904     // prefix and continue scanning. We directly skip the prefix that was
    905     // matched and any additional parts of that check-like word.
    906     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
    907   }
    908 
    909   // We ran out of buffer while skipping partial matches so give up.
    910   return StringRef();
    911 }
    912 
    913 /// Read the check file, which specifies the sequence of expected strings.
    914 ///
    915 /// The strings are added to the CheckStrings vector. Returns true in case of
    916 /// an error, false otherwise.
    917 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
    918                           std::vector<CheckString> &CheckStrings) {
    919   std::vector<Pattern> ImplicitNegativeChecks;
    920   for (const auto &PatternString : ImplicitCheckNot) {
    921     // Create a buffer with fake command line content in order to display the
    922     // command line option responsible for the specific implicit CHECK-NOT.
    923     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
    924     std::string Suffix = "'";
    925     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
    926         Prefix + PatternString + Suffix, "command line");
    927 
    928     StringRef PatternInBuffer =
    929         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
    930     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
    931 
    932     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
    933     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
    934                                                "IMPLICIT-CHECK", SM, 0);
    935   }
    936 
    937   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
    938 
    939   // LineNumber keeps track of the line on which CheckPrefix instances are
    940   // found.
    941   unsigned LineNumber = 1;
    942 
    943   while (1) {
    944     Check::CheckType CheckTy;
    945 
    946     // See if a prefix occurs in the memory buffer.
    947     StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
    948                                                    CheckTy);
    949     if (UsedPrefix.empty())
    950       break;
    951     assert(UsedPrefix.data() == Buffer.data() &&
    952            "Failed to move Buffer's start forward, or pointed prefix outside "
    953            "of the buffer!");
    954 
    955     // Location to use for error messages.
    956     const char *UsedPrefixStart = UsedPrefix.data();
    957 
    958     // Skip the buffer to the end.
    959     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
    960 
    961     // Complain about useful-looking but unsupported suffixes.
    962     if (CheckTy == Check::CheckBadNot) {
    963       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
    964                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
    965       return true;
    966     }
    967 
    968     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
    969     // leading whitespace.
    970     if (!(NoCanonicalizeWhiteSpace && MatchFullLines))
    971       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
    972 
    973     // Scan ahead to the end of line.
    974     size_t EOL = Buffer.find_first_of("\n\r");
    975 
    976     // Remember the location of the start of the pattern, for diagnostics.
    977     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
    978 
    979     // Parse the pattern.
    980     Pattern P(CheckTy);
    981     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
    982       return true;
    983 
    984     // Verify that CHECK-LABEL lines do not define or use variables
    985     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
    986       SM.PrintMessage(
    987           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
    988           "found '" + UsedPrefix + "-LABEL:'"
    989                                    " with variable definition or use");
    990       return true;
    991     }
    992 
    993     Buffer = Buffer.substr(EOL);
    994 
    995     // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
    996     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
    997          CheckTy == Check::CheckEmpty) &&
    998         CheckStrings.empty()) {
    999       StringRef Type = CheckTy == Check::CheckNext
   1000                            ? "NEXT"
   1001                            : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
   1002       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
   1003                       SourceMgr::DK_Error,
   1004                       "found '" + UsedPrefix + "-" + Type +
   1005                           "' without previous '" + UsedPrefix + ": line");
   1006       return true;
   1007     }
   1008 
   1009     // Handle CHECK-DAG/-NOT.
   1010     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
   1011       DagNotMatches.push_back(P);
   1012       continue;
   1013     }
   1014 
   1015     // Okay, add the string we captured to the output vector and move on.
   1016     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
   1017     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
   1018     DagNotMatches = ImplicitNegativeChecks;
   1019   }
   1020 
   1021   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
   1022   // prefix as a filler for the error message.
   1023   if (!DagNotMatches.empty()) {
   1024     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
   1025                               SMLoc::getFromPointer(Buffer.data()));
   1026     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
   1027   }
   1028 
   1029   if (CheckStrings.empty()) {
   1030     errs() << "error: no check strings found with prefix"
   1031            << (CheckPrefixes.size() > 1 ? "es " : " ");
   1032     prefix_iterator I = CheckPrefixes.begin();
   1033     prefix_iterator E = CheckPrefixes.end();
   1034     if (I != E) {
   1035       errs() << "\'" << *I << ":'";
   1036       ++I;
   1037     }
   1038     for (; I != E; ++I)
   1039       errs() << ", \'" << *I << ":'";
   1040 
   1041     errs() << '\n';
   1042     return true;
   1043   }
   1044 
   1045   return false;
   1046 }
   1047 
   1048 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
   1049                        StringRef Prefix, SMLoc Loc, const Pattern &Pat,
   1050                        StringRef Buffer, StringMap<StringRef> &VariableTable,
   1051                        size_t MatchPos, size_t MatchLen) {
   1052   if (ExpectedMatch) {
   1053     if (!Verbose)
   1054       return;
   1055     if (!VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
   1056       return;
   1057   }
   1058   SMLoc MatchStart = SMLoc::getFromPointer(Buffer.data() + MatchPos);
   1059   SMLoc MatchEnd = SMLoc::getFromPointer(Buffer.data() + MatchPos + MatchLen);
   1060   SMRange MatchRange(MatchStart, MatchEnd);
   1061   SM.PrintMessage(
   1062       Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error,
   1063       CheckTypeName(Prefix, Pat.getCheckTy()) + ": " +
   1064           (ExpectedMatch ? "expected" : "excluded") +
   1065           " string found in input");
   1066   SM.PrintMessage(MatchStart, SourceMgr::DK_Note, "found here", {MatchRange});
   1067   Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange);
   1068 }
   1069 
   1070 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
   1071                        const CheckString &CheckStr, StringRef Buffer,
   1072                        StringMap<StringRef> &VariableTable, size_t MatchPos,
   1073                        size_t MatchLen) {
   1074   PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
   1075              Buffer, VariableTable, MatchPos, MatchLen);
   1076 }
   1077 
   1078 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
   1079                          StringRef Prefix, SMLoc Loc, const Pattern &Pat,
   1080                          StringRef Buffer,
   1081                          StringMap<StringRef> &VariableTable) {
   1082   if (!ExpectedMatch && !VerboseVerbose)
   1083     return;
   1084 
   1085   // Otherwise, we have an error, emit an error message.
   1086   SM.PrintMessage(Loc,
   1087                   ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark,
   1088                   CheckTypeName(Prefix, Pat.getCheckTy()) + ": " +
   1089                       (ExpectedMatch ? "expected" : "excluded") +
   1090                       " string not found in input");
   1091 
   1092   // Print the "scanning from here" line.  If the current position is at the
   1093   // end of a line, advance to the start of the next line.
   1094   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
   1095 
   1096   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
   1097                   "scanning from here");
   1098 
   1099   // Allow the pattern to print additional information if desired.
   1100   Pat.PrintVariableUses(SM, Buffer, VariableTable);
   1101   if (ExpectedMatch)
   1102     Pat.PrintFuzzyMatch(SM, Buffer, VariableTable);
   1103 }
   1104 
   1105 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
   1106                          const CheckString &CheckStr, StringRef Buffer,
   1107                          StringMap<StringRef> &VariableTable) {
   1108   PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
   1109                Buffer, VariableTable);
   1110 }
   1111 
   1112 /// Count the number of newlines in the specified range.
   1113 static unsigned CountNumNewlinesBetween(StringRef Range,
   1114                                         const char *&FirstNewLine) {
   1115   unsigned NumNewLines = 0;
   1116   while (1) {
   1117     // Scan for newline.
   1118     Range = Range.substr(Range.find_first_of("\n\r"));
   1119     if (Range.empty())
   1120       return NumNewLines;
   1121 
   1122     ++NumNewLines;
   1123 
   1124     // Handle \n\r and \r\n as a single newline.
   1125     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
   1126         (Range[0] != Range[1]))
   1127       Range = Range.substr(1);
   1128     Range = Range.substr(1);
   1129 
   1130     if (NumNewLines == 1)
   1131       FirstNewLine = Range.begin();
   1132   }
   1133 }
   1134 
   1135 /// Match check string and its "not strings" and/or "dag strings".
   1136 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
   1137                           bool IsLabelScanMode, size_t &MatchLen,
   1138                           StringMap<StringRef> &VariableTable) const {
   1139   size_t LastPos = 0;
   1140   std::vector<const Pattern *> NotStrings;
   1141 
   1142   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
   1143   // bounds; we have not processed variable definitions within the bounded block
   1144   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
   1145   // over the block again (including the last CHECK-LABEL) in normal mode.
   1146   if (!IsLabelScanMode) {
   1147     // Match "dag strings" (with mixed "not strings" if any).
   1148     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
   1149     if (LastPos == StringRef::npos)
   1150       return StringRef::npos;
   1151   }
   1152 
   1153   // Match itself from the last position after matching CHECK-DAG.
   1154   StringRef MatchBuffer = Buffer.substr(LastPos);
   1155   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
   1156   if (MatchPos == StringRef::npos) {
   1157     PrintNoMatch(true, SM, *this, MatchBuffer, VariableTable);
   1158     return StringRef::npos;
   1159   }
   1160   PrintMatch(true, SM, *this, MatchBuffer, VariableTable, MatchPos, MatchLen);
   1161 
   1162   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
   1163   // or CHECK-NOT
   1164   if (!IsLabelScanMode) {
   1165     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
   1166 
   1167     // If this check is a "CHECK-NEXT", verify that the previous match was on
   1168     // the previous line (i.e. that there is one newline between them).
   1169     if (CheckNext(SM, SkippedRegion))
   1170       return StringRef::npos;
   1171 
   1172     // If this check is a "CHECK-SAME", verify that the previous match was on
   1173     // the same line (i.e. that there is no newline between them).
   1174     if (CheckSame(SM, SkippedRegion))
   1175       return StringRef::npos;
   1176 
   1177     // If this match had "not strings", verify that they don't exist in the
   1178     // skipped region.
   1179     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
   1180       return StringRef::npos;
   1181   }
   1182 
   1183   return LastPos + MatchPos;
   1184 }
   1185 
   1186 /// Verify there is a single line in the given buffer.
   1187 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
   1188   if (Pat.getCheckTy() != Check::CheckNext &&
   1189       Pat.getCheckTy() != Check::CheckEmpty)
   1190     return false;
   1191 
   1192   Twine CheckName =
   1193       Prefix +
   1194       Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
   1195 
   1196   // Count the number of newlines between the previous match and this one.
   1197   assert(Buffer.data() !=
   1198              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
   1199                                     SMLoc::getFromPointer(Buffer.data())))
   1200                  ->getBufferStart() &&
   1201          "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
   1202 
   1203   const char *FirstNewLine = nullptr;
   1204   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
   1205 
   1206   if (NumNewLines == 0) {
   1207     SM.PrintMessage(Loc, SourceMgr::DK_Error,
   1208                     CheckName + ": is on the same line as previous match");
   1209     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
   1210                     "'next' match was here");
   1211     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
   1212                     "previous match ended here");
   1213     return true;
   1214   }
   1215 
   1216   if (NumNewLines != 1) {
   1217     SM.PrintMessage(Loc, SourceMgr::DK_Error,
   1218                     CheckName +
   1219                         ": is not on the line after the previous match");
   1220     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
   1221                     "'next' match was here");
   1222     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
   1223                     "previous match ended here");
   1224     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
   1225                     "non-matching line after previous match is here");
   1226     return true;
   1227   }
   1228 
   1229   return false;
   1230 }
   1231 
   1232 /// Verify there is no newline in the given buffer.
   1233 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
   1234   if (Pat.getCheckTy() != Check::CheckSame)
   1235     return false;
   1236 
   1237   // Count the number of newlines between the previous match and this one.
   1238   assert(Buffer.data() !=
   1239              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
   1240                                     SMLoc::getFromPointer(Buffer.data())))
   1241                  ->getBufferStart() &&
   1242          "CHECK-SAME can't be the first check in a file");
   1243 
   1244   const char *FirstNewLine = nullptr;
   1245   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
   1246 
   1247   if (NumNewLines != 0) {
   1248     SM.PrintMessage(Loc, SourceMgr::DK_Error,
   1249                     Prefix +
   1250                         "-SAME: is not on the same line as the previous match");
   1251     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
   1252                     "'next' match was here");
   1253     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
   1254                     "previous match ended here");
   1255     return true;
   1256   }
   1257 
   1258   return false;
   1259 }
   1260 
   1261 /// Verify there's no "not strings" in the given buffer.
   1262 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
   1263                            const std::vector<const Pattern *> &NotStrings,
   1264                            StringMap<StringRef> &VariableTable) const {
   1265   for (const Pattern *Pat : NotStrings) {
   1266     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
   1267 
   1268     size_t MatchLen = 0;
   1269     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
   1270 
   1271     if (Pos == StringRef::npos) {
   1272       PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer,
   1273                    VariableTable);
   1274       continue;
   1275     }
   1276 
   1277     PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer, VariableTable,
   1278                Pos, MatchLen);
   1279 
   1280     return true;
   1281   }
   1282 
   1283   return false;
   1284 }
   1285 
   1286 /// Match "dag strings" and their mixed "not strings".
   1287 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
   1288                              std::vector<const Pattern *> &NotStrings,
   1289                              StringMap<StringRef> &VariableTable) const {
   1290   if (DagNotStrings.empty())
   1291     return 0;
   1292 
   1293   // The start of the search range.
   1294   size_t StartPos = 0;
   1295 
   1296   struct MatchRange {
   1297     size_t Pos;
   1298     size_t End;
   1299   };
   1300   // A sorted list of ranges for non-overlapping CHECK-DAG matches.  Match
   1301   // ranges are erased from this list once they are no longer in the search
   1302   // range.
   1303   std::list<MatchRange> MatchRanges;
   1304 
   1305   // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG
   1306   // group, so we don't use a range-based for loop here.
   1307   for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
   1308        PatItr != PatEnd; ++PatItr) {
   1309     const Pattern &Pat = *PatItr;
   1310     assert((Pat.getCheckTy() == Check::CheckDAG ||
   1311             Pat.getCheckTy() == Check::CheckNot) &&
   1312            "Invalid CHECK-DAG or CHECK-NOT!");
   1313 
   1314     if (Pat.getCheckTy() == Check::CheckNot) {
   1315       NotStrings.push_back(&Pat);
   1316       continue;
   1317     }
   1318 
   1319     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
   1320 
   1321     // CHECK-DAG always matches from the start.
   1322     size_t MatchLen = 0, MatchPos = StartPos;
   1323 
   1324     // Search for a match that doesn't overlap a previous match in this
   1325     // CHECK-DAG group.
   1326     for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
   1327       StringRef MatchBuffer = Buffer.substr(MatchPos);
   1328       size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
   1329       // With a group of CHECK-DAGs, a single mismatching means the match on
   1330       // that group of CHECK-DAGs fails immediately.
   1331       if (MatchPosBuf == StringRef::npos) {
   1332         PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, MatchBuffer,
   1333                      VariableTable);
   1334         return StringRef::npos;
   1335       }
   1336       // Re-calc it as the offset relative to the start of the original string.
   1337       MatchPos += MatchPosBuf;
   1338       if (VerboseVerbose)
   1339         PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable,
   1340                    MatchPos, MatchLen);
   1341       MatchRange M{MatchPos, MatchPos + MatchLen};
   1342       if (AllowDeprecatedDagOverlap) {
   1343         // We don't need to track all matches in this mode, so we just maintain
   1344         // one match range that encompasses the current CHECK-DAG group's
   1345         // matches.
   1346         if (MatchRanges.empty())
   1347           MatchRanges.insert(MatchRanges.end(), M);
   1348         else {
   1349           auto Block = MatchRanges.begin();
   1350           Block->Pos = std::min(Block->Pos, M.Pos);
   1351           Block->End = std::max(Block->End, M.End);
   1352         }
   1353         break;
   1354       }
   1355       // Iterate previous matches until overlapping match or insertion point.
   1356       bool Overlap = false;
   1357       for (; MI != ME; ++MI) {
   1358         if (M.Pos < MI->End) {
   1359           // !Overlap => New match has no overlap and is before this old match.
   1360           // Overlap => New match overlaps this old match.
   1361           Overlap = MI->Pos < M.End;
   1362           break;
   1363         }
   1364       }
   1365       if (!Overlap) {
   1366         // Insert non-overlapping match into list.
   1367         MatchRanges.insert(MI, M);
   1368         break;
   1369       }
   1370       if (VerboseVerbose) {
   1371         SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
   1372         SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
   1373         SMRange OldRange(OldStart, OldEnd);
   1374         SM.PrintMessage(OldStart, SourceMgr::DK_Note,
   1375                         "match discarded, overlaps earlier DAG match here",
   1376                         {OldRange});
   1377       }
   1378       MatchPos = MI->End;
   1379     }
   1380     if (!VerboseVerbose)
   1381       PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable,
   1382                  MatchPos, MatchLen);
   1383 
   1384     // Handle the end of a CHECK-DAG group.
   1385     if (std::next(PatItr) == PatEnd ||
   1386         std::next(PatItr)->getCheckTy() == Check::CheckNot) {
   1387       if (!NotStrings.empty()) {
   1388         // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to
   1389         // CHECK-DAG, verify that there are no 'not' strings occurred in that
   1390         // region.
   1391         StringRef SkippedRegion =
   1392             Buffer.slice(StartPos, MatchRanges.begin()->Pos);
   1393         if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
   1394           return StringRef::npos;
   1395         // Clear "not strings".
   1396         NotStrings.clear();
   1397       }
   1398       // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the
   1399       // end of this CHECK-DAG group's match range.
   1400       StartPos = MatchRanges.rbegin()->End;
   1401       // Don't waste time checking for (impossible) overlaps before that.
   1402       MatchRanges.clear();
   1403     }
   1404   }
   1405 
   1406   return StartPos;
   1407 }
   1408 
   1409 // A check prefix must contain only alphanumeric, hyphens and underscores.
   1410 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
   1411   Regex Validator("^[a-zA-Z0-9_-]*$");
   1412   return Validator.match(CheckPrefix);
   1413 }
   1414 
   1415 static bool ValidateCheckPrefixes() {
   1416   StringSet<> PrefixSet;
   1417 
   1418   for (StringRef Prefix : CheckPrefixes) {
   1419     // Reject empty prefixes.
   1420     if (Prefix == "")
   1421       return false;
   1422 
   1423     if (!PrefixSet.insert(Prefix).second)
   1424       return false;
   1425 
   1426     if (!ValidateCheckPrefix(Prefix))
   1427       return false;
   1428   }
   1429 
   1430   return true;
   1431 }
   1432 
   1433 // Combines the check prefixes into a single regex so that we can efficiently
   1434 // scan for any of the set.
   1435 //
   1436 // The semantics are that the longest-match wins which matches our regex
   1437 // library.
   1438 static Regex buildCheckPrefixRegex() {
   1439   // I don't think there's a way to specify an initial value for cl::list,
   1440   // so if nothing was specified, add the default
   1441   if (CheckPrefixes.empty())
   1442     CheckPrefixes.push_back("CHECK");
   1443 
   1444   // We already validated the contents of CheckPrefixes so just concatenate
   1445   // them as alternatives.
   1446   SmallString<32> PrefixRegexStr;
   1447   for (StringRef Prefix : CheckPrefixes) {
   1448     if (Prefix != CheckPrefixes.front())
   1449       PrefixRegexStr.push_back('|');
   1450 
   1451     PrefixRegexStr.append(Prefix);
   1452   }
   1453 
   1454   return Regex(PrefixRegexStr);
   1455 }
   1456 
   1457 static void DumpCommandLine(int argc, char **argv) {
   1458   errs() << "FileCheck command line: ";
   1459   for (int I = 0; I < argc; I++)
   1460     errs() << " " << argv[I];
   1461   errs() << "\n";
   1462 }
   1463 
   1464 // Remove local variables from \p VariableTable. Global variables
   1465 // (start with '$') are preserved.
   1466 static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
   1467   SmallVector<StringRef, 16> LocalVars;
   1468   for (const auto &Var : VariableTable)
   1469     if (Var.first()[0] != '$')
   1470       LocalVars.push_back(Var.first());
   1471 
   1472   for (const auto &Var : LocalVars)
   1473     VariableTable.erase(Var);
   1474 }
   1475 
   1476 /// Check the input to FileCheck provided in the \p Buffer against the \p
   1477 /// CheckStrings read from the check file.
   1478 ///
   1479 /// Returns false if the input fails to satisfy the checks.
   1480 bool CheckInput(SourceMgr &SM, StringRef Buffer,
   1481                 ArrayRef<CheckString> CheckStrings) {
   1482   bool ChecksFailed = false;
   1483 
   1484   /// VariableTable - This holds all the current filecheck variables.
   1485   StringMap<StringRef> VariableTable;
   1486 
   1487   for (const auto& Def : GlobalDefines)
   1488     VariableTable.insert(StringRef(Def).split('='));
   1489 
   1490   unsigned i = 0, j = 0, e = CheckStrings.size();
   1491   while (true) {
   1492     StringRef CheckRegion;
   1493     if (j == e) {
   1494       CheckRegion = Buffer;
   1495     } else {
   1496       const CheckString &CheckLabelStr = CheckStrings[j];
   1497       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
   1498         ++j;
   1499         continue;
   1500       }
   1501 
   1502       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
   1503       size_t MatchLabelLen = 0;
   1504       size_t MatchLabelPos =
   1505           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable);
   1506       if (MatchLabelPos == StringRef::npos)
   1507         // Immediately bail of CHECK-LABEL fails, nothing else we can do.
   1508         return false;
   1509 
   1510       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
   1511       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
   1512       ++j;
   1513     }
   1514 
   1515     if (EnableVarScope)
   1516       ClearLocalVars(VariableTable);
   1517 
   1518     for (; i != j; ++i) {
   1519       const CheckString &CheckStr = CheckStrings[i];
   1520 
   1521       // Check each string within the scanned region, including a second check
   1522       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
   1523       size_t MatchLen = 0;
   1524       size_t MatchPos =
   1525           CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable);
   1526 
   1527       if (MatchPos == StringRef::npos) {
   1528         ChecksFailed = true;
   1529         i = j;
   1530         break;
   1531       }
   1532 
   1533       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
   1534     }
   1535 
   1536     if (j == e)
   1537       break;
   1538   }
   1539 
   1540   // Success if no checks failed.
   1541   return !ChecksFailed;
   1542 }
   1543 
   1544 int main(int argc, char **argv) {
   1545   InitLLVM X(argc, argv);
   1546   cl::ParseCommandLineOptions(argc, argv);
   1547 
   1548   if (!ValidateCheckPrefixes()) {
   1549     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
   1550               "start with a letter and contain only alphanumeric characters, "
   1551               "hyphens and underscores\n";
   1552     return 2;
   1553   }
   1554 
   1555   Regex PrefixRE = buildCheckPrefixRegex();
   1556   std::string REError;
   1557   if (!PrefixRE.isValid(REError)) {
   1558     errs() << "Unable to combine check-prefix strings into a prefix regular "
   1559               "expression! This is likely a bug in FileCheck's verification of "
   1560               "the check-prefix strings. Regular expression parsing failed "
   1561               "with the following error: "
   1562            << REError << "\n";
   1563     return 2;
   1564   }
   1565 
   1566   if (VerboseVerbose)
   1567     Verbose = true;
   1568 
   1569   SourceMgr SM;
   1570 
   1571   // Read the expected strings from the check file.
   1572   ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
   1573       MemoryBuffer::getFileOrSTDIN(CheckFilename);
   1574   if (std::error_code EC = CheckFileOrErr.getError()) {
   1575     errs() << "Could not open check file '" << CheckFilename
   1576            << "': " << EC.message() << '\n';
   1577     return 2;
   1578   }
   1579   MemoryBuffer &CheckFile = *CheckFileOrErr.get();
   1580 
   1581   SmallString<4096> CheckFileBuffer;
   1582   StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer);
   1583 
   1584   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
   1585                             CheckFileText, CheckFile.getBufferIdentifier()),
   1586                         SMLoc());
   1587 
   1588   std::vector<CheckString> CheckStrings;
   1589   if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings))
   1590     return 2;
   1591 
   1592   // Open the file to check and add it to SourceMgr.
   1593   ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
   1594       MemoryBuffer::getFileOrSTDIN(InputFilename);
   1595   if (std::error_code EC = InputFileOrErr.getError()) {
   1596     errs() << "Could not open input file '" << InputFilename
   1597            << "': " << EC.message() << '\n';
   1598     return 2;
   1599   }
   1600   MemoryBuffer &InputFile = *InputFileOrErr.get();
   1601 
   1602   if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
   1603     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
   1604     DumpCommandLine(argc, argv);
   1605     return 2;
   1606   }
   1607 
   1608   SmallString<4096> InputFileBuffer;
   1609   StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer);
   1610 
   1611   SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
   1612                             InputFileText, InputFile.getBufferIdentifier()),
   1613                         SMLoc());
   1614 
   1615   int ExitCode = CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
   1616   if (ExitCode == 1 && DumpInputOnFailure)
   1617     errs() << "Full input was:\n<<<<<<\n" << InputFileText << "\n>>>>>>\n";
   1618 
   1619   return ExitCode;
   1620 }
   1621