Home | History | Annotate | Download | only in FileCheck
      1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // FileCheck does a line-by line check of a file that validates whether it
     11 // contains the expected content.  This is useful for regression tests etc.
     12 //
     13 // This program exits with an error status of 2 on error, exit status of 0 if
     14 // the file matched the expected contents, and exit status of 1 if it did not
     15 // contain the expected contents.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "llvm/ADT/OwningPtr.h"
     20 #include "llvm/ADT/SmallString.h"
     21 #include "llvm/ADT/StringExtras.h"
     22 #include "llvm/ADT/StringMap.h"
     23 #include "llvm/Support/CommandLine.h"
     24 #include "llvm/Support/MemoryBuffer.h"
     25 #include "llvm/Support/PrettyStackTrace.h"
     26 #include "llvm/Support/Regex.h"
     27 #include "llvm/Support/Signals.h"
     28 #include "llvm/Support/SourceMgr.h"
     29 #include "llvm/Support/raw_ostream.h"
     30 #include "llvm/Support/system_error.h"
     31 #include <algorithm>
     32 #include <map>
     33 #include <string>
     34 #include <vector>
     35 using namespace llvm;
     36 
     37 static cl::opt<std::string>
     38 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
     39 
     40 static cl::opt<std::string>
     41 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
     42               cl::init("-"), cl::value_desc("filename"));
     43 
     44 static cl::opt<std::string>
     45 CheckPrefix("check-prefix", cl::init("CHECK"),
     46             cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
     47 
     48 static cl::opt<bool>
     49 NoCanonicalizeWhiteSpace("strict-whitespace",
     50               cl::desc("Do not treat all horizontal whitespace as equivalent"));
     51 
     52 //===----------------------------------------------------------------------===//
     53 // Pattern Handling Code.
     54 //===----------------------------------------------------------------------===//
     55 
     56 class Pattern {
     57   SMLoc PatternLoc;
     58 
     59   /// MatchEOF - When set, this pattern only matches the end of file. This is
     60   /// used for trailing CHECK-NOTs.
     61   bool MatchEOF;
     62 
     63   /// FixedStr - If non-empty, this pattern is a fixed string match with the
     64   /// specified fixed string.
     65   StringRef FixedStr;
     66 
     67   /// RegEx - If non-empty, this is a regex pattern.
     68   std::string RegExStr;
     69 
     70   /// \brief Contains the number of line this pattern is in.
     71   unsigned LineNumber;
     72 
     73   /// VariableUses - Entries in this vector map to uses of a variable in the
     74   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
     75   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
     76   /// value of bar at offset 3.
     77   std::vector<std::pair<StringRef, unsigned> > VariableUses;
     78 
     79   /// VariableDefs - Maps definitions of variables to their parenthesized
     80   /// capture numbers.
     81   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
     82   std::map<StringRef, unsigned> VariableDefs;
     83 
     84 public:
     85 
     86   Pattern(bool matchEOF = false) : MatchEOF(matchEOF) { }
     87 
     88   /// ParsePattern - Parse the given string into the Pattern.  SM provides the
     89   /// SourceMgr used for error reports, and LineNumber is the line number in
     90   /// the input file from which the pattern string was read.
     91   /// Returns true in case of an error, false otherwise.
     92   bool ParsePattern(StringRef PatternStr, SourceMgr &SM, unsigned LineNumber);
     93 
     94   /// Match - Match the pattern string against the input buffer Buffer.  This
     95   /// returns the position that is matched or npos if there is no match.  If
     96   /// there is a match, the size of the matched string is returned in MatchLen.
     97   ///
     98   /// The VariableTable StringMap provides the current values of filecheck
     99   /// variables and is updated if this match defines new values.
    100   size_t Match(StringRef Buffer, size_t &MatchLen,
    101                StringMap<StringRef> &VariableTable) const;
    102 
    103   /// PrintFailureInfo - Print additional information about a failure to match
    104   /// involving this pattern.
    105   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
    106                         const StringMap<StringRef> &VariableTable) const;
    107 
    108 private:
    109   static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr);
    110   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
    111   void AddBackrefToRegEx(unsigned BackrefNum);
    112 
    113   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
    114   /// matching this pattern at the start of \arg Buffer; a distance of zero
    115   /// should correspond to a perfect match.
    116   unsigned ComputeMatchDistance(StringRef Buffer,
    117                                const StringMap<StringRef> &VariableTable) const;
    118 
    119   /// \brief Evaluates expression and stores the result to \p Value.
    120   /// \return true on success. false when the expression has invalid syntax.
    121   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
    122 
    123   /// \brief Finds the closing sequence of a regex variable usage or
    124   /// definition. Str has to point in the beginning of the definition
    125   /// (right after the opening sequence).
    126   /// \return offset of the closing sequence within Str, or npos if it was not
    127   /// found.
    128   size_t FindRegexVarEnd(StringRef Str);
    129 };
    130 
    131 
    132 bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM,
    133                            unsigned LineNumber) {
    134   this->LineNumber = LineNumber;
    135   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
    136 
    137   // Ignore trailing whitespace.
    138   while (!PatternStr.empty() &&
    139          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
    140     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
    141 
    142   // Check that there is something on the line.
    143   if (PatternStr.empty()) {
    144     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
    145                     "found empty check string with prefix '" +
    146                     CheckPrefix+":'");
    147     return true;
    148   }
    149 
    150   // Check to see if this is a fixed string, or if it has regex pieces.
    151   if (PatternStr.size() < 2 ||
    152       (PatternStr.find("{{") == StringRef::npos &&
    153        PatternStr.find("[[") == StringRef::npos)) {
    154     FixedStr = PatternStr;
    155     return false;
    156   }
    157 
    158   // Paren value #0 is for the fully matched string.  Any new parenthesized
    159   // values add from there.
    160   unsigned CurParen = 1;
    161 
    162   // Otherwise, there is at least one regex piece.  Build up the regex pattern
    163   // by escaping scary characters in fixed strings, building up one big regex.
    164   while (!PatternStr.empty()) {
    165     // RegEx matches.
    166     if (PatternStr.startswith("{{")) {
    167       // This is the start of a regex match.  Scan for the }}.
    168       size_t End = PatternStr.find("}}");
    169       if (End == StringRef::npos) {
    170         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
    171                         SourceMgr::DK_Error,
    172                         "found start of regex string with no end '}}'");
    173         return true;
    174       }
    175 
    176       // Enclose {{}} patterns in parens just like [[]] even though we're not
    177       // capturing the result for any purpose.  This is required in case the
    178       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
    179       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
    180       RegExStr += '(';
    181       ++CurParen;
    182 
    183       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
    184         return true;
    185       RegExStr += ')';
    186 
    187       PatternStr = PatternStr.substr(End+2);
    188       continue;
    189     }
    190 
    191     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
    192     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
    193     // second form is [[foo]] which is a reference to foo.  The variable name
    194     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
    195     // it.  This is to catch some common errors.
    196     if (PatternStr.startswith("[[")) {
    197       // Find the closing bracket pair ending the match.  End is going to be an
    198       // offset relative to the beginning of the match string.
    199       size_t End = FindRegexVarEnd(PatternStr.substr(2));
    200 
    201       if (End == StringRef::npos) {
    202         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
    203                         SourceMgr::DK_Error,
    204                         "invalid named regex reference, no ]] found");
    205         return true;
    206       }
    207 
    208       StringRef MatchStr = PatternStr.substr(2, End);
    209       PatternStr = PatternStr.substr(End+4);
    210 
    211       // Get the regex name (e.g. "foo").
    212       size_t NameEnd = MatchStr.find(':');
    213       StringRef Name = MatchStr.substr(0, NameEnd);
    214 
    215       if (Name.empty()) {
    216         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
    217                         "invalid name in named regex: empty name");
    218         return true;
    219       }
    220 
    221       // Verify that the name/expression is well formed. FileCheck currently
    222       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
    223       // is relaxed, more strict check is performed in \c EvaluateExpression.
    224       bool IsExpression = false;
    225       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
    226         if (i == 0 && Name[i] == '@') {
    227           if (NameEnd != StringRef::npos) {
    228             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
    229                             SourceMgr::DK_Error,
    230                             "invalid name in named regex definition");
    231             return true;
    232           }
    233           IsExpression = true;
    234           continue;
    235         }
    236         if (Name[i] != '_' && !isalnum(Name[i]) &&
    237             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
    238           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
    239                           SourceMgr::DK_Error, "invalid name in named regex");
    240           return true;
    241         }
    242       }
    243 
    244       // Name can't start with a digit.
    245       if (isdigit(static_cast<unsigned char>(Name[0]))) {
    246         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
    247                         "invalid name in named regex");
    248         return true;
    249       }
    250 
    251       // Handle [[foo]].
    252       if (NameEnd == StringRef::npos) {
    253         // Handle variables that were defined earlier on the same line by
    254         // emitting a backreference.
    255         if (VariableDefs.find(Name) != VariableDefs.end()) {
    256           unsigned VarParenNum = VariableDefs[Name];
    257           if (VarParenNum < 1 || VarParenNum > 9) {
    258             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
    259                             SourceMgr::DK_Error,
    260                             "Can't back-reference more than 9 variables");
    261             return true;
    262           }
    263           AddBackrefToRegEx(VarParenNum);
    264         } else {
    265           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
    266         }
    267         continue;
    268       }
    269 
    270       // Handle [[foo:.*]].
    271       VariableDefs[Name] = CurParen;
    272       RegExStr += '(';
    273       ++CurParen;
    274 
    275       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
    276         return true;
    277 
    278       RegExStr += ')';
    279     }
    280 
    281     // Handle fixed string matches.
    282     // Find the end, which is the start of the next regex.
    283     size_t FixedMatchEnd = PatternStr.find("{{");
    284     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
    285     AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr);
    286     PatternStr = PatternStr.substr(FixedMatchEnd);
    287   }
    288 
    289   return false;
    290 }
    291 
    292 void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) {
    293   // Add the characters from FixedStr to the regex, escaping as needed.  This
    294   // avoids "leaning toothpicks" in common patterns.
    295   for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) {
    296     switch (FixedStr[i]) {
    297     // These are the special characters matched in "p_ere_exp".
    298     case '(':
    299     case ')':
    300     case '^':
    301     case '$':
    302     case '|':
    303     case '*':
    304     case '+':
    305     case '?':
    306     case '.':
    307     case '[':
    308     case '\\':
    309     case '{':
    310       TheStr += '\\';
    311       // FALL THROUGH.
    312     default:
    313       TheStr += FixedStr[i];
    314       break;
    315     }
    316   }
    317 }
    318 
    319 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
    320                               SourceMgr &SM) {
    321   Regex R(RS);
    322   std::string Error;
    323   if (!R.isValid(Error)) {
    324     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
    325                     "invalid regex: " + Error);
    326     return true;
    327   }
    328 
    329   RegExStr += RS.str();
    330   CurParen += R.getNumMatches();
    331   return false;
    332 }
    333 
    334 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
    335   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
    336   std::string Backref = std::string("\\") +
    337                         std::string(1, '0' + BackrefNum);
    338   RegExStr += Backref;
    339 }
    340 
    341 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
    342   // The only supported expression is @LINE([\+-]\d+)?
    343   if (!Expr.startswith("@LINE"))
    344     return false;
    345   Expr = Expr.substr(StringRef("@LINE").size());
    346   int Offset = 0;
    347   if (!Expr.empty()) {
    348     if (Expr[0] == '+')
    349       Expr = Expr.substr(1);
    350     else if (Expr[0] != '-')
    351       return false;
    352     if (Expr.getAsInteger(10, Offset))
    353       return false;
    354   }
    355   Value = llvm::itostr(LineNumber + Offset);
    356   return true;
    357 }
    358 
    359 /// Match - Match the pattern string against the input buffer Buffer.  This
    360 /// returns the position that is matched or npos if there is no match.  If
    361 /// there is a match, the size of the matched string is returned in MatchLen.
    362 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
    363                       StringMap<StringRef> &VariableTable) const {
    364   // If this is the EOF pattern, match it immediately.
    365   if (MatchEOF) {
    366     MatchLen = 0;
    367     return Buffer.size();
    368   }
    369 
    370   // If this is a fixed string pattern, just match it now.
    371   if (!FixedStr.empty()) {
    372     MatchLen = FixedStr.size();
    373     return Buffer.find(FixedStr);
    374   }
    375 
    376   // Regex match.
    377 
    378   // If there are variable uses, we need to create a temporary string with the
    379   // actual value.
    380   StringRef RegExToMatch = RegExStr;
    381   std::string TmpStr;
    382   if (!VariableUses.empty()) {
    383     TmpStr = RegExStr;
    384 
    385     unsigned InsertOffset = 0;
    386     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
    387       std::string Value;
    388 
    389       if (VariableUses[i].first[0] == '@') {
    390         if (!EvaluateExpression(VariableUses[i].first, Value))
    391           return StringRef::npos;
    392       } else {
    393         StringMap<StringRef>::iterator it =
    394           VariableTable.find(VariableUses[i].first);
    395         // If the variable is undefined, return an error.
    396         if (it == VariableTable.end())
    397           return StringRef::npos;
    398 
    399         // Look up the value and escape it so that we can plop it into the regex.
    400         AddFixedStringToRegEx(it->second, Value);
    401       }
    402 
    403       // Plop it into the regex at the adjusted offset.
    404       TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
    405                     Value.begin(), Value.end());
    406       InsertOffset += Value.size();
    407     }
    408 
    409     // Match the newly constructed regex.
    410     RegExToMatch = TmpStr;
    411   }
    412 
    413 
    414   SmallVector<StringRef, 4> MatchInfo;
    415   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
    416     return StringRef::npos;
    417 
    418   // Successful regex match.
    419   assert(!MatchInfo.empty() && "Didn't get any match");
    420   StringRef FullMatch = MatchInfo[0];
    421 
    422   // If this defines any variables, remember their values.
    423   for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
    424                                                      E = VariableDefs.end();
    425        I != E; ++I) {
    426     assert(I->second < MatchInfo.size() && "Internal paren error");
    427     VariableTable[I->first] = MatchInfo[I->second];
    428   }
    429 
    430   MatchLen = FullMatch.size();
    431   return FullMatch.data()-Buffer.data();
    432 }
    433 
    434 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
    435                               const StringMap<StringRef> &VariableTable) const {
    436   // Just compute the number of matching characters. For regular expressions, we
    437   // just compare against the regex itself and hope for the best.
    438   //
    439   // FIXME: One easy improvement here is have the regex lib generate a single
    440   // example regular expression which matches, and use that as the example
    441   // string.
    442   StringRef ExampleString(FixedStr);
    443   if (ExampleString.empty())
    444     ExampleString = RegExStr;
    445 
    446   // Only compare up to the first line in the buffer, or the string size.
    447   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
    448   BufferPrefix = BufferPrefix.split('\n').first;
    449   return BufferPrefix.edit_distance(ExampleString);
    450 }
    451 
    452 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
    453                                const StringMap<StringRef> &VariableTable) const{
    454   // If this was a regular expression using variables, print the current
    455   // variable values.
    456   if (!VariableUses.empty()) {
    457     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
    458       SmallString<256> Msg;
    459       raw_svector_ostream OS(Msg);
    460       StringRef Var = VariableUses[i].first;
    461       if (Var[0] == '@') {
    462         std::string Value;
    463         if (EvaluateExpression(Var, Value)) {
    464           OS << "with expression \"";
    465           OS.write_escaped(Var) << "\" equal to \"";
    466           OS.write_escaped(Value) << "\"";
    467         } else {
    468           OS << "uses incorrect expression \"";
    469           OS.write_escaped(Var) << "\"";
    470         }
    471       } else {
    472         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
    473 
    474         // Check for undefined variable references.
    475         if (it == VariableTable.end()) {
    476           OS << "uses undefined variable \"";
    477           OS.write_escaped(Var) << "\"";
    478         } else {
    479           OS << "with variable \"";
    480           OS.write_escaped(Var) << "\" equal to \"";
    481           OS.write_escaped(it->second) << "\"";
    482         }
    483       }
    484 
    485       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
    486                       OS.str());
    487     }
    488   }
    489 
    490   // Attempt to find the closest/best fuzzy match.  Usually an error happens
    491   // because some string in the output didn't exactly match. In these cases, we
    492   // would like to show the user a best guess at what "should have" matched, to
    493   // save them having to actually check the input manually.
    494   size_t NumLinesForward = 0;
    495   size_t Best = StringRef::npos;
    496   double BestQuality = 0;
    497 
    498   // Use an arbitrary 4k limit on how far we will search.
    499   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
    500     if (Buffer[i] == '\n')
    501       ++NumLinesForward;
    502 
    503     // Patterns have leading whitespace stripped, so skip whitespace when
    504     // looking for something which looks like a pattern.
    505     if (Buffer[i] == ' ' || Buffer[i] == '\t')
    506       continue;
    507 
    508     // Compute the "quality" of this match as an arbitrary combination of the
    509     // match distance and the number of lines skipped to get to this match.
    510     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
    511     double Quality = Distance + (NumLinesForward / 100.);
    512 
    513     if (Quality < BestQuality || Best == StringRef::npos) {
    514       Best = i;
    515       BestQuality = Quality;
    516     }
    517   }
    518 
    519   // Print the "possible intended match here" line if we found something
    520   // reasonable and not equal to what we showed in the "scanning from here"
    521   // line.
    522   if (Best && Best != StringRef::npos && BestQuality < 50) {
    523       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
    524                       SourceMgr::DK_Note, "possible intended match here");
    525 
    526     // FIXME: If we wanted to be really friendly we would show why the match
    527     // failed, as it can be hard to spot simple one character differences.
    528   }
    529 }
    530 
    531 size_t Pattern::FindRegexVarEnd(StringRef Str) {
    532   // Offset keeps track of the current offset within the input Str
    533   size_t Offset = 0;
    534   // [...] Nesting depth
    535   size_t BracketDepth = 0;
    536 
    537   while (!Str.empty()) {
    538     if (Str.startswith("]]") && BracketDepth == 0)
    539       return Offset;
    540     if (Str[0] == '\\') {
    541       // Backslash escapes the next char within regexes, so skip them both.
    542       Str = Str.substr(2);
    543       Offset += 2;
    544     } else {
    545       switch (Str[0]) {
    546         default:
    547           break;
    548         case '[':
    549           BracketDepth++;
    550           break;
    551         case ']':
    552           assert(BracketDepth > 0 && "Invalid regex");
    553           BracketDepth--;
    554           break;
    555       }
    556       Str = Str.substr(1);
    557       Offset++;
    558     }
    559   }
    560 
    561   return StringRef::npos;
    562 }
    563 
    564 
    565 //===----------------------------------------------------------------------===//
    566 // Check Strings.
    567 //===----------------------------------------------------------------------===//
    568 
    569 /// CheckString - This is a check that we found in the input file.
    570 struct CheckString {
    571   /// Pat - The pattern to match.
    572   Pattern Pat;
    573 
    574   /// Loc - The location in the match file that the check string was specified.
    575   SMLoc Loc;
    576 
    577   /// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed
    578   /// to a CHECK: directive.
    579   bool IsCheckNext;
    580 
    581   /// NotStrings - These are all of the strings that are disallowed from
    582   /// occurring between this match string and the previous one (or start of
    583   /// file).
    584   std::vector<std::pair<SMLoc, Pattern> > NotStrings;
    585 
    586   CheckString(const Pattern &P, SMLoc L, bool isCheckNext)
    587     : Pat(P), Loc(L), IsCheckNext(isCheckNext) {}
    588 };
    589 
    590 /// Canonicalize whitespaces in the input file. Line endings are replaced
    591 /// with UNIX-style '\n'.
    592 ///
    593 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
    594 /// characters to a single space.
    595 static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB,
    596                                            bool PreserveHorizontal) {
    597   SmallString<128> NewFile;
    598   NewFile.reserve(MB->getBufferSize());
    599 
    600   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
    601        Ptr != End; ++Ptr) {
    602     // Eliminate trailing dosish \r.
    603     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
    604       continue;
    605     }
    606 
    607     // If current char is not a horizontal whitespace or if horizontal
    608     // whitespace canonicalization is disabled, dump it to output as is.
    609     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
    610       NewFile.push_back(*Ptr);
    611       continue;
    612     }
    613 
    614     // Otherwise, add one space and advance over neighboring space.
    615     NewFile.push_back(' ');
    616     while (Ptr+1 != End &&
    617            (Ptr[1] == ' ' || Ptr[1] == '\t'))
    618       ++Ptr;
    619   }
    620 
    621   // Free the old buffer and return a new one.
    622   MemoryBuffer *MB2 =
    623     MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier());
    624 
    625   delete MB;
    626   return MB2;
    627 }
    628 
    629 
    630 /// ReadCheckFile - Read the check file, which specifies the sequence of
    631 /// expected strings.  The strings are added to the CheckStrings vector.
    632 /// Returns true in case of an error, false otherwise.
    633 static bool ReadCheckFile(SourceMgr &SM,
    634                           std::vector<CheckString> &CheckStrings) {
    635   OwningPtr<MemoryBuffer> File;
    636   if (error_code ec =
    637         MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), File)) {
    638     errs() << "Could not open check file '" << CheckFilename << "': "
    639            << ec.message() << '\n';
    640     return true;
    641   }
    642   MemoryBuffer *F = File.take();
    643 
    644   // If we want to canonicalize whitespace, strip excess whitespace from the
    645   // buffer containing the CHECK lines. Remove DOS style line endings.
    646   F = CanonicalizeInputFile(F, NoCanonicalizeWhiteSpace);
    647 
    648   SM.AddNewSourceBuffer(F, SMLoc());
    649 
    650   // Find all instances of CheckPrefix followed by : in the file.
    651   StringRef Buffer = F->getBuffer();
    652   std::vector<std::pair<SMLoc, Pattern> > NotMatches;
    653 
    654   // LineNumber keeps track of the line on which CheckPrefix instances are
    655   // found.
    656   unsigned LineNumber = 1;
    657 
    658   while (1) {
    659     // See if Prefix occurs in the memory buffer.
    660     size_t PrefixLoc = Buffer.find(CheckPrefix);
    661     // If we didn't find a match, we're done.
    662     if (PrefixLoc == StringRef::npos)
    663       break;
    664 
    665     LineNumber += Buffer.substr(0, PrefixLoc).count('\n');
    666 
    667     Buffer = Buffer.substr(PrefixLoc);
    668 
    669     const char *CheckPrefixStart = Buffer.data();
    670 
    671     // When we find a check prefix, keep track of whether we find CHECK: or
    672     // CHECK-NEXT:
    673     bool IsCheckNext = false, IsCheckNot = false;
    674 
    675     // Verify that the : is present after the prefix.
    676     if (Buffer[CheckPrefix.size()] == ':') {
    677       Buffer = Buffer.substr(CheckPrefix.size()+1);
    678     } else if (Buffer.size() > CheckPrefix.size()+6 &&
    679                memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) {
    680       Buffer = Buffer.substr(CheckPrefix.size()+6);
    681       IsCheckNext = true;
    682     } else if (Buffer.size() > CheckPrefix.size()+5 &&
    683                memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) {
    684       Buffer = Buffer.substr(CheckPrefix.size()+5);
    685       IsCheckNot = true;
    686     } else {
    687       Buffer = Buffer.substr(1);
    688       continue;
    689     }
    690 
    691     // Okay, we found the prefix, yay.  Remember the rest of the line, but
    692     // ignore leading and trailing whitespace.
    693     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
    694 
    695     // Scan ahead to the end of line.
    696     size_t EOL = Buffer.find_first_of("\n\r");
    697 
    698     // Remember the location of the start of the pattern, for diagnostics.
    699     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
    700 
    701     // Parse the pattern.
    702     Pattern P;
    703     if (P.ParsePattern(Buffer.substr(0, EOL), SM, LineNumber))
    704       return true;
    705 
    706     Buffer = Buffer.substr(EOL);
    707 
    708     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
    709     if (IsCheckNext && CheckStrings.empty()) {
    710       SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
    711                       SourceMgr::DK_Error,
    712                       "found '"+CheckPrefix+"-NEXT:' without previous '"+
    713                       CheckPrefix+ ": line");
    714       return true;
    715     }
    716 
    717     // Handle CHECK-NOT.
    718     if (IsCheckNot) {
    719       NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()),
    720                                           P));
    721       continue;
    722     }
    723 
    724     // Okay, add the string we captured to the output vector and move on.
    725     CheckStrings.push_back(CheckString(P,
    726                                        PatternLoc,
    727                                        IsCheckNext));
    728     std::swap(NotMatches, CheckStrings.back().NotStrings);
    729   }
    730 
    731   // Add an EOF pattern for any trailing CHECK-NOTs.
    732   if (!NotMatches.empty()) {
    733     CheckStrings.push_back(CheckString(Pattern(true),
    734                                        SMLoc::getFromPointer(Buffer.data()),
    735                                        false));
    736     std::swap(NotMatches, CheckStrings.back().NotStrings);
    737   }
    738 
    739   if (CheckStrings.empty()) {
    740     errs() << "error: no check strings found with prefix '" << CheckPrefix
    741            << ":'\n";
    742     return true;
    743   }
    744 
    745   return false;
    746 }
    747 
    748 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
    749                              StringRef Buffer,
    750                              StringMap<StringRef> &VariableTable) {
    751   // Otherwise, we have an error, emit an error message.
    752   SM.PrintMessage(CheckStr.Loc, SourceMgr::DK_Error,
    753                   "expected string not found in input");
    754 
    755   // Print the "scanning from here" line.  If the current position is at the
    756   // end of a line, advance to the start of the next line.
    757   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
    758 
    759   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
    760                   "scanning from here");
    761 
    762   // Allow the pattern to print additional information if desired.
    763   CheckStr.Pat.PrintFailureInfo(SM, Buffer, VariableTable);
    764 }
    765 
    766 /// CountNumNewlinesBetween - Count the number of newlines in the specified
    767 /// range.
    768 static unsigned CountNumNewlinesBetween(StringRef Range) {
    769   unsigned NumNewLines = 0;
    770   while (1) {
    771     // Scan for newline.
    772     Range = Range.substr(Range.find_first_of("\n\r"));
    773     if (Range.empty()) return NumNewLines;
    774 
    775     ++NumNewLines;
    776 
    777     // Handle \n\r and \r\n as a single newline.
    778     if (Range.size() > 1 &&
    779         (Range[1] == '\n' || Range[1] == '\r') &&
    780         (Range[0] != Range[1]))
    781       Range = Range.substr(1);
    782     Range = Range.substr(1);
    783   }
    784 }
    785 
    786 int main(int argc, char **argv) {
    787   sys::PrintStackTraceOnErrorSignal();
    788   PrettyStackTraceProgram X(argc, argv);
    789   cl::ParseCommandLineOptions(argc, argv);
    790 
    791   SourceMgr SM;
    792 
    793   // Read the expected strings from the check file.
    794   std::vector<CheckString> CheckStrings;
    795   if (ReadCheckFile(SM, CheckStrings))
    796     return 2;
    797 
    798   // Open the file to check and add it to SourceMgr.
    799   OwningPtr<MemoryBuffer> File;
    800   if (error_code ec =
    801         MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) {
    802     errs() << "Could not open input file '" << InputFilename << "': "
    803            << ec.message() << '\n';
    804     return 2;
    805   }
    806   MemoryBuffer *F = File.take();
    807 
    808   if (F->getBufferSize() == 0) {
    809     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
    810     return 2;
    811   }
    812 
    813   // Remove duplicate spaces in the input file if requested.
    814   // Remove DOS style line endings.
    815   F = CanonicalizeInputFile(F, NoCanonicalizeWhiteSpace);
    816 
    817   SM.AddNewSourceBuffer(F, SMLoc());
    818 
    819   /// VariableTable - This holds all the current filecheck variables.
    820   StringMap<StringRef> VariableTable;
    821 
    822   // Check that we have all of the expected strings, in order, in the input
    823   // file.
    824   StringRef Buffer = F->getBuffer();
    825 
    826   const char *LastMatch = Buffer.data();
    827 
    828   for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) {
    829     const CheckString &CheckStr = CheckStrings[StrNo];
    830 
    831     StringRef SearchFrom = Buffer;
    832 
    833     // Find StrNo in the file.
    834     size_t MatchLen = 0;
    835     size_t MatchPos = CheckStr.Pat.Match(Buffer, MatchLen, VariableTable);
    836     Buffer = Buffer.substr(MatchPos);
    837 
    838     // If we didn't find a match, reject the input.
    839     if (MatchPos == StringRef::npos) {
    840       PrintCheckFailed(SM, CheckStr, SearchFrom, VariableTable);
    841       return 1;
    842     }
    843 
    844     StringRef SkippedRegion(LastMatch, Buffer.data()-LastMatch);
    845 
    846     // If this check is a "CHECK-NEXT", verify that the previous match was on
    847     // the previous line (i.e. that there is one newline between them).
    848     if (CheckStr.IsCheckNext) {
    849       // Count the number of newlines between the previous match and this one.
    850       assert(LastMatch != F->getBufferStart() &&
    851              "CHECK-NEXT can't be the first check in a file");
    852 
    853       unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion);
    854       if (NumNewLines == 0) {
    855         SM.PrintMessage(CheckStr.Loc, SourceMgr::DK_Error,
    856                     CheckPrefix+"-NEXT: is on the same line as previous match");
    857         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
    858                         SourceMgr::DK_Note, "'next' match was here");
    859         SM.PrintMessage(SMLoc::getFromPointer(LastMatch), SourceMgr::DK_Note,
    860                         "previous match was here");
    861         return 1;
    862       }
    863 
    864       if (NumNewLines != 1) {
    865         SM.PrintMessage(CheckStr.Loc, SourceMgr::DK_Error, CheckPrefix+
    866                         "-NEXT: is not on the line after the previous match");
    867         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
    868                         SourceMgr::DK_Note, "'next' match was here");
    869         SM.PrintMessage(SMLoc::getFromPointer(LastMatch), SourceMgr::DK_Note,
    870                         "previous match was here");
    871         return 1;
    872       }
    873     }
    874 
    875     // If this match had "not strings", verify that they don't exist in the
    876     // skipped region.
    877     for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size();
    878          ChunkNo != e; ++ChunkNo) {
    879       size_t MatchLen = 0;
    880       size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion,
    881                                                              MatchLen,
    882                                                              VariableTable);
    883       if (Pos == StringRef::npos) continue;
    884 
    885       SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos), SourceMgr::DK_Error,
    886                       CheckPrefix+"-NOT: string occurred!");
    887       SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first, SourceMgr::DK_Note,
    888                       CheckPrefix+"-NOT: pattern specified here");
    889       return 1;
    890     }
    891 
    892 
    893     // Otherwise, everything is good.  Step over the matched text and remember
    894     // the position after the match as the end of the last match.
    895     Buffer = Buffer.substr(MatchLen);
    896     LastMatch = Buffer.data();
    897   }
    898 
    899   return 0;
    900 }
    901