1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an exit status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/InitLLVM.h" 25 #include "llvm/Support/MemoryBuffer.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/SourceMgr.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <list> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> CheckPrefixes( 46 "check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 static cl::alias CheckPrefixesAlias( 49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50 cl::NotHidden, 51 cl::desc( 52 "Alias for -check-prefix permitting multiple comma separated values")); 53 54 static cl::opt<bool> NoCanonicalizeWhiteSpace( 55 "strict-whitespace", 56 cl::desc("Do not treat all horizontal whitespace as equivalent")); 57 58 static cl::list<std::string> ImplicitCheckNot( 59 "implicit-check-not", 60 cl::desc("Add an implicit negative check with this pattern to every\n" 61 "positive check. This can be used to ensure that no instances of\n" 62 "this pattern occur which are not matched by a positive pattern"), 63 cl::value_desc("pattern")); 64 65 static cl::list<std::string> GlobalDefines("D", cl::Prefix, 66 cl::desc("Define a variable to be used in capture patterns."), 67 cl::value_desc("VAR=VALUE")); 68 69 static cl::opt<bool> AllowEmptyInput( 70 "allow-empty", cl::init(false), 71 cl::desc("Allow the input file to be empty. This is useful when making\n" 72 "checks that some error message does not occur, for example.")); 73 74 static cl::opt<bool> MatchFullLines( 75 "match-full-lines", cl::init(false), 76 cl::desc("Require all positive matches to cover an entire input line.\n" 77 "Allows leading and trailing whitespace if --strict-whitespace\n" 78 "is not also passed.")); 79 80 static cl::opt<bool> EnableVarScope( 81 "enable-var-scope", cl::init(false), 82 cl::desc("Enables scope for regex variables. Variables with names that\n" 83 "do not start with '$' will be reset at the beginning of\n" 84 "each CHECK-LABEL block.")); 85 86 static cl::opt<bool> AllowDeprecatedDagOverlap( 87 "allow-deprecated-dag-overlap", cl::init(false), 88 cl::desc("Enable overlapping among matches in a group of consecutive\n" 89 "CHECK-DAG directives. This option is deprecated and is only\n" 90 "provided for convenience as old tests are migrated to the new\n" 91 "non-overlapping CHECK-DAG implementation.\n")); 92 93 static cl::opt<bool> Verbose("v", cl::init(false), 94 cl::desc("Print directive pattern matches.\n")); 95 96 static cl::opt<bool> VerboseVerbose( 97 "vv", cl::init(false), 98 cl::desc("Print information helpful in diagnosing internal FileCheck\n" 99 "issues. Implies -v.\n")); 100 static const char * DumpInputEnv = "FILECHECK_DUMP_INPUT_ON_FAILURE"; 101 102 static cl::opt<bool> DumpInputOnFailure( 103 "dump-input-on-failure", cl::init(std::getenv(DumpInputEnv)), 104 cl::desc("Dump original input to stderr before failing.\n" 105 "The value can be also controlled using\n" 106 "FILECHECK_DUMP_INPUT_ON_FAILURE environment variable.\n")); 107 108 typedef cl::list<std::string>::const_iterator prefix_iterator; 109 110 //===----------------------------------------------------------------------===// 111 // Pattern Handling Code. 112 //===----------------------------------------------------------------------===// 113 114 namespace Check { 115 enum CheckType { 116 CheckNone = 0, 117 CheckPlain, 118 CheckNext, 119 CheckSame, 120 CheckNot, 121 CheckDAG, 122 CheckLabel, 123 CheckEmpty, 124 125 /// Indicates the pattern only matches the end of file. This is used for 126 /// trailing CHECK-NOTs. 127 CheckEOF, 128 129 /// Marks when parsing found a -NOT check combined with another CHECK suffix. 130 CheckBadNot 131 }; 132 } 133 134 class Pattern { 135 SMLoc PatternLoc; 136 137 /// A fixed string to match as the pattern or empty if this pattern requires 138 /// a regex match. 139 StringRef FixedStr; 140 141 /// A regex string to match as the pattern or empty if this pattern requires 142 /// a fixed string to match. 143 std::string RegExStr; 144 145 /// Entries in this vector map to uses of a variable in the pattern, e.g. 146 /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and 147 /// we'll get an entry in this vector that tells us to insert the value of 148 /// bar at offset 3. 149 std::vector<std::pair<StringRef, unsigned>> VariableUses; 150 151 /// Maps definitions of variables to their parenthesized capture numbers. 152 /// 153 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 154 /// 1. 155 std::map<StringRef, unsigned> VariableDefs; 156 157 Check::CheckType CheckTy; 158 159 /// Contains the number of line this pattern is in. 160 unsigned LineNumber; 161 162 public: 163 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 164 165 /// Returns the location in source code. 166 SMLoc getLoc() const { return PatternLoc; } 167 168 bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, 169 unsigned LineNumber); 170 size_t Match(StringRef Buffer, size_t &MatchLen, 171 StringMap<StringRef> &VariableTable) const; 172 void PrintVariableUses(const SourceMgr &SM, StringRef Buffer, 173 const StringMap<StringRef> &VariableTable, 174 SMRange MatchRange = None) const; 175 void PrintFuzzyMatch(const SourceMgr &SM, StringRef Buffer, 176 const StringMap<StringRef> &VariableTable) const; 177 178 bool hasVariable() const { 179 return !(VariableUses.empty() && VariableDefs.empty()); 180 } 181 182 Check::CheckType getCheckTy() const { return CheckTy; } 183 184 private: 185 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 186 void AddBackrefToRegEx(unsigned BackrefNum); 187 unsigned 188 ComputeMatchDistance(StringRef Buffer, 189 const StringMap<StringRef> &VariableTable) const; 190 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 191 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 192 }; 193 194 /// Parses the given string into the Pattern. 195 /// 196 /// \p Prefix provides which prefix is being matched, \p SM provides the 197 /// SourceMgr used for error reports, and \p LineNumber is the line number in 198 /// the input file from which the pattern string was read. Returns true in 199 /// case of an error, false otherwise. 200 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix, 201 SourceMgr &SM, unsigned LineNumber) { 202 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 203 204 this->LineNumber = LineNumber; 205 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 206 207 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 208 // Ignore trailing whitespace. 209 while (!PatternStr.empty() && 210 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 211 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 212 213 // Check that there is something on the line. 214 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { 215 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 216 "found empty check string with prefix '" + Prefix + ":'"); 217 return true; 218 } 219 220 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { 221 SM.PrintMessage( 222 PatternLoc, SourceMgr::DK_Error, 223 "found non-empty check string for empty check with prefix '" + Prefix + 224 ":'"); 225 return true; 226 } 227 228 if (CheckTy == Check::CheckEmpty) { 229 RegExStr = "(\n$)"; 230 return false; 231 } 232 233 // Check to see if this is a fixed string, or if it has regex pieces. 234 if (!MatchFullLinesHere && 235 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 236 PatternStr.find("[[") == StringRef::npos))) { 237 FixedStr = PatternStr; 238 return false; 239 } 240 241 if (MatchFullLinesHere) { 242 RegExStr += '^'; 243 if (!NoCanonicalizeWhiteSpace) 244 RegExStr += " *"; 245 } 246 247 // Paren value #0 is for the fully matched string. Any new parenthesized 248 // values add from there. 249 unsigned CurParen = 1; 250 251 // Otherwise, there is at least one regex piece. Build up the regex pattern 252 // by escaping scary characters in fixed strings, building up one big regex. 253 while (!PatternStr.empty()) { 254 // RegEx matches. 255 if (PatternStr.startswith("{{")) { 256 // This is the start of a regex match. Scan for the }}. 257 size_t End = PatternStr.find("}}"); 258 if (End == StringRef::npos) { 259 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 260 SourceMgr::DK_Error, 261 "found start of regex string with no end '}}'"); 262 return true; 263 } 264 265 // Enclose {{}} patterns in parens just like [[]] even though we're not 266 // capturing the result for any purpose. This is required in case the 267 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 268 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 269 RegExStr += '('; 270 ++CurParen; 271 272 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 273 return true; 274 RegExStr += ')'; 275 276 PatternStr = PatternStr.substr(End + 2); 277 continue; 278 } 279 280 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 281 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 282 // second form is [[foo]] which is a reference to foo. The variable name 283 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 284 // it. This is to catch some common errors. 285 if (PatternStr.startswith("[[")) { 286 // Find the closing bracket pair ending the match. End is going to be an 287 // offset relative to the beginning of the match string. 288 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 289 290 if (End == StringRef::npos) { 291 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 292 SourceMgr::DK_Error, 293 "invalid named regex reference, no ]] found"); 294 return true; 295 } 296 297 StringRef MatchStr = PatternStr.substr(2, End); 298 PatternStr = PatternStr.substr(End + 4); 299 300 // Get the regex name (e.g. "foo"). 301 size_t NameEnd = MatchStr.find(':'); 302 StringRef Name = MatchStr.substr(0, NameEnd); 303 304 if (Name.empty()) { 305 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 306 "invalid name in named regex: empty name"); 307 return true; 308 } 309 310 // Verify that the name/expression is well formed. FileCheck currently 311 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 312 // is relaxed, more strict check is performed in \c EvaluateExpression. 313 bool IsExpression = false; 314 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 315 if (i == 0) { 316 if (Name[i] == '$') // Global vars start with '$' 317 continue; 318 if (Name[i] == '@') { 319 if (NameEnd != StringRef::npos) { 320 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 321 SourceMgr::DK_Error, 322 "invalid name in named regex definition"); 323 return true; 324 } 325 IsExpression = true; 326 continue; 327 } 328 } 329 if (Name[i] != '_' && !isalnum(Name[i]) && 330 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 331 SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), 332 SourceMgr::DK_Error, "invalid name in named regex"); 333 return true; 334 } 335 } 336 337 // Name can't start with a digit. 338 if (isdigit(static_cast<unsigned char>(Name[0]))) { 339 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 340 "invalid name in named regex"); 341 return true; 342 } 343 344 // Handle [[foo]]. 345 if (NameEnd == StringRef::npos) { 346 // Handle variables that were defined earlier on the same line by 347 // emitting a backreference. 348 if (VariableDefs.find(Name) != VariableDefs.end()) { 349 unsigned VarParenNum = VariableDefs[Name]; 350 if (VarParenNum < 1 || VarParenNum > 9) { 351 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 352 SourceMgr::DK_Error, 353 "Can't back-reference more than 9 variables"); 354 return true; 355 } 356 AddBackrefToRegEx(VarParenNum); 357 } else { 358 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 359 } 360 continue; 361 } 362 363 // Handle [[foo:.*]]. 364 VariableDefs[Name] = CurParen; 365 RegExStr += '('; 366 ++CurParen; 367 368 if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) 369 return true; 370 371 RegExStr += ')'; 372 } 373 374 // Handle fixed string matches. 375 // Find the end, which is the start of the next regex. 376 size_t FixedMatchEnd = PatternStr.find("{{"); 377 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 378 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 379 PatternStr = PatternStr.substr(FixedMatchEnd); 380 } 381 382 if (MatchFullLinesHere) { 383 if (!NoCanonicalizeWhiteSpace) 384 RegExStr += " *"; 385 RegExStr += '$'; 386 } 387 388 return false; 389 } 390 391 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 392 Regex R(RS); 393 std::string Error; 394 if (!R.isValid(Error)) { 395 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 396 "invalid regex: " + Error); 397 return true; 398 } 399 400 RegExStr += RS.str(); 401 CurParen += R.getNumMatches(); 402 return false; 403 } 404 405 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 406 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 407 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 408 RegExStr += Backref; 409 } 410 411 /// Evaluates expression and stores the result to \p Value. 412 /// 413 /// Returns true on success and false when the expression has invalid syntax. 414 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 415 // The only supported expression is @LINE([\+-]\d+)? 416 if (!Expr.startswith("@LINE")) 417 return false; 418 Expr = Expr.substr(StringRef("@LINE").size()); 419 int Offset = 0; 420 if (!Expr.empty()) { 421 if (Expr[0] == '+') 422 Expr = Expr.substr(1); 423 else if (Expr[0] != '-') 424 return false; 425 if (Expr.getAsInteger(10, Offset)) 426 return false; 427 } 428 Value = llvm::itostr(LineNumber + Offset); 429 return true; 430 } 431 432 /// Matches the pattern string against the input buffer \p Buffer 433 /// 434 /// This returns the position that is matched or npos if there is no match. If 435 /// there is a match, the size of the matched string is returned in \p 436 /// MatchLen. 437 /// 438 /// The \p VariableTable StringMap provides the current values of filecheck 439 /// variables and is updated if this match defines new values. 440 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 441 StringMap<StringRef> &VariableTable) const { 442 // If this is the EOF pattern, match it immediately. 443 if (CheckTy == Check::CheckEOF) { 444 MatchLen = 0; 445 return Buffer.size(); 446 } 447 448 // If this is a fixed string pattern, just match it now. 449 if (!FixedStr.empty()) { 450 MatchLen = FixedStr.size(); 451 return Buffer.find(FixedStr); 452 } 453 454 // Regex match. 455 456 // If there are variable uses, we need to create a temporary string with the 457 // actual value. 458 StringRef RegExToMatch = RegExStr; 459 std::string TmpStr; 460 if (!VariableUses.empty()) { 461 TmpStr = RegExStr; 462 463 unsigned InsertOffset = 0; 464 for (const auto &VariableUse : VariableUses) { 465 std::string Value; 466 467 if (VariableUse.first[0] == '@') { 468 if (!EvaluateExpression(VariableUse.first, Value)) 469 return StringRef::npos; 470 } else { 471 StringMap<StringRef>::iterator it = 472 VariableTable.find(VariableUse.first); 473 // If the variable is undefined, return an error. 474 if (it == VariableTable.end()) 475 return StringRef::npos; 476 477 // Look up the value and escape it so that we can put it into the regex. 478 Value += Regex::escape(it->second); 479 } 480 481 // Plop it into the regex at the adjusted offset. 482 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 483 Value.begin(), Value.end()); 484 InsertOffset += Value.size(); 485 } 486 487 // Match the newly constructed regex. 488 RegExToMatch = TmpStr; 489 } 490 491 SmallVector<StringRef, 4> MatchInfo; 492 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 493 return StringRef::npos; 494 495 // Successful regex match. 496 assert(!MatchInfo.empty() && "Didn't get any match"); 497 StringRef FullMatch = MatchInfo[0]; 498 499 // If this defines any variables, remember their values. 500 for (const auto &VariableDef : VariableDefs) { 501 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 502 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 503 } 504 505 // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after 506 // the required preceding newline, which is consumed by the pattern in the 507 // case of CHECK-EMPTY but not CHECK-NEXT. 508 size_t MatchStartSkip = CheckTy == Check::CheckEmpty; 509 MatchLen = FullMatch.size() - MatchStartSkip; 510 return FullMatch.data() - Buffer.data() + MatchStartSkip; 511 } 512 513 514 /// Computes an arbitrary estimate for the quality of matching this pattern at 515 /// the start of \p Buffer; a distance of zero should correspond to a perfect 516 /// match. 517 unsigned 518 Pattern::ComputeMatchDistance(StringRef Buffer, 519 const StringMap<StringRef> &VariableTable) const { 520 // Just compute the number of matching characters. For regular expressions, we 521 // just compare against the regex itself and hope for the best. 522 // 523 // FIXME: One easy improvement here is have the regex lib generate a single 524 // example regular expression which matches, and use that as the example 525 // string. 526 StringRef ExampleString(FixedStr); 527 if (ExampleString.empty()) 528 ExampleString = RegExStr; 529 530 // Only compare up to the first line in the buffer, or the string size. 531 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 532 BufferPrefix = BufferPrefix.split('\n').first; 533 return BufferPrefix.edit_distance(ExampleString); 534 } 535 536 void Pattern::PrintVariableUses(const SourceMgr &SM, StringRef Buffer, 537 const StringMap<StringRef> &VariableTable, 538 SMRange MatchRange) const { 539 // If this was a regular expression using variables, print the current 540 // variable values. 541 if (!VariableUses.empty()) { 542 for (const auto &VariableUse : VariableUses) { 543 SmallString<256> Msg; 544 raw_svector_ostream OS(Msg); 545 StringRef Var = VariableUse.first; 546 if (Var[0] == '@') { 547 std::string Value; 548 if (EvaluateExpression(Var, Value)) { 549 OS << "with expression \""; 550 OS.write_escaped(Var) << "\" equal to \""; 551 OS.write_escaped(Value) << "\""; 552 } else { 553 OS << "uses incorrect expression \""; 554 OS.write_escaped(Var) << "\""; 555 } 556 } else { 557 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 558 559 // Check for undefined variable references. 560 if (it == VariableTable.end()) { 561 OS << "uses undefined variable \""; 562 OS.write_escaped(Var) << "\""; 563 } else { 564 OS << "with variable \""; 565 OS.write_escaped(Var) << "\" equal to \""; 566 OS.write_escaped(it->second) << "\""; 567 } 568 } 569 570 if (MatchRange.isValid()) 571 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(), 572 {MatchRange}); 573 else 574 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), 575 SourceMgr::DK_Note, OS.str()); 576 } 577 } 578 } 579 580 void Pattern::PrintFuzzyMatch( 581 const SourceMgr &SM, StringRef Buffer, 582 const StringMap<StringRef> &VariableTable) const { 583 // Attempt to find the closest/best fuzzy match. Usually an error happens 584 // because some string in the output didn't exactly match. In these cases, we 585 // would like to show the user a best guess at what "should have" matched, to 586 // save them having to actually check the input manually. 587 size_t NumLinesForward = 0; 588 size_t Best = StringRef::npos; 589 double BestQuality = 0; 590 591 // Use an arbitrary 4k limit on how far we will search. 592 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 593 if (Buffer[i] == '\n') 594 ++NumLinesForward; 595 596 // Patterns have leading whitespace stripped, so skip whitespace when 597 // looking for something which looks like a pattern. 598 if (Buffer[i] == ' ' || Buffer[i] == '\t') 599 continue; 600 601 // Compute the "quality" of this match as an arbitrary combination of the 602 // match distance and the number of lines skipped to get to this match. 603 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 604 double Quality = Distance + (NumLinesForward / 100.); 605 606 if (Quality < BestQuality || Best == StringRef::npos) { 607 Best = i; 608 BestQuality = Quality; 609 } 610 } 611 612 // Print the "possible intended match here" line if we found something 613 // reasonable and not equal to what we showed in the "scanning from here" 614 // line. 615 if (Best && Best != StringRef::npos && BestQuality < 50) { 616 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 617 SourceMgr::DK_Note, "possible intended match here"); 618 619 // FIXME: If we wanted to be really friendly we would show why the match 620 // failed, as it can be hard to spot simple one character differences. 621 } 622 } 623 624 /// Finds the closing sequence of a regex variable usage or definition. 625 /// 626 /// \p Str has to point in the beginning of the definition (right after the 627 /// opening sequence). Returns the offset of the closing sequence within Str, 628 /// or npos if it was not found. 629 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 630 // Offset keeps track of the current offset within the input Str 631 size_t Offset = 0; 632 // [...] Nesting depth 633 size_t BracketDepth = 0; 634 635 while (!Str.empty()) { 636 if (Str.startswith("]]") && BracketDepth == 0) 637 return Offset; 638 if (Str[0] == '\\') { 639 // Backslash escapes the next char within regexes, so skip them both. 640 Str = Str.substr(2); 641 Offset += 2; 642 } else { 643 switch (Str[0]) { 644 default: 645 break; 646 case '[': 647 BracketDepth++; 648 break; 649 case ']': 650 if (BracketDepth == 0) { 651 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 652 SourceMgr::DK_Error, 653 "missing closing \"]\" for regex variable"); 654 exit(1); 655 } 656 BracketDepth--; 657 break; 658 } 659 Str = Str.substr(1); 660 Offset++; 661 } 662 } 663 664 return StringRef::npos; 665 } 666 667 //===----------------------------------------------------------------------===// 668 // Check Strings. 669 //===----------------------------------------------------------------------===// 670 671 /// A check that we found in the input file. 672 struct CheckString { 673 /// The pattern to match. 674 Pattern Pat; 675 676 /// Which prefix name this check matched. 677 StringRef Prefix; 678 679 /// The location in the match file that the check string was specified. 680 SMLoc Loc; 681 682 /// All of the strings that are disallowed from occurring between this match 683 /// string and the previous one (or start of file). 684 std::vector<Pattern> DagNotStrings; 685 686 CheckString(const Pattern &P, StringRef S, SMLoc L) 687 : Pat(P), Prefix(S), Loc(L) {} 688 689 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 690 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 691 692 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 693 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 694 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 695 const std::vector<const Pattern *> &NotStrings, 696 StringMap<StringRef> &VariableTable) const; 697 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 698 std::vector<const Pattern *> &NotStrings, 699 StringMap<StringRef> &VariableTable) const; 700 }; 701 702 /// Canonicalize whitespaces in the file. Line endings are replaced with 703 /// UNIX-style '\n'. 704 static StringRef CanonicalizeFile(MemoryBuffer &MB, 705 SmallVectorImpl<char> &OutputBuffer) { 706 OutputBuffer.reserve(MB.getBufferSize()); 707 708 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 709 Ptr != End; ++Ptr) { 710 // Eliminate trailing dosish \r. 711 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 712 continue; 713 } 714 715 // If current char is not a horizontal whitespace or if horizontal 716 // whitespace canonicalization is disabled, dump it to output as is. 717 if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 718 OutputBuffer.push_back(*Ptr); 719 continue; 720 } 721 722 // Otherwise, add one space and advance over neighboring space. 723 OutputBuffer.push_back(' '); 724 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 725 ++Ptr; 726 } 727 728 // Add a null byte and then return all but that byte. 729 OutputBuffer.push_back('\0'); 730 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 731 } 732 733 static bool IsPartOfWord(char c) { 734 return (isalnum(c) || c == '-' || c == '_'); 735 } 736 737 // Get the size of the prefix extension. 738 static size_t CheckTypeSize(Check::CheckType Ty) { 739 switch (Ty) { 740 case Check::CheckNone: 741 case Check::CheckBadNot: 742 return 0; 743 744 case Check::CheckPlain: 745 return sizeof(":") - 1; 746 747 case Check::CheckNext: 748 return sizeof("-NEXT:") - 1; 749 750 case Check::CheckSame: 751 return sizeof("-SAME:") - 1; 752 753 case Check::CheckNot: 754 return sizeof("-NOT:") - 1; 755 756 case Check::CheckDAG: 757 return sizeof("-DAG:") - 1; 758 759 case Check::CheckLabel: 760 return sizeof("-LABEL:") - 1; 761 762 case Check::CheckEmpty: 763 return sizeof("-EMPTY:") - 1; 764 765 case Check::CheckEOF: 766 llvm_unreachable("Should not be using EOF size"); 767 } 768 769 llvm_unreachable("Bad check type"); 770 } 771 772 // Get a description of the type. 773 static std::string CheckTypeName(StringRef Prefix, Check::CheckType Ty) { 774 switch (Ty) { 775 case Check::CheckNone: 776 return "invalid"; 777 case Check::CheckPlain: 778 return Prefix; 779 case Check::CheckNext: 780 return Prefix.str() + "-NEXT"; 781 case Check::CheckSame: 782 return Prefix.str() + "-SAME"; 783 case Check::CheckNot: 784 return Prefix.str() + "-NOT"; 785 case Check::CheckDAG: 786 return Prefix.str() + "-DAG"; 787 case Check::CheckLabel: 788 return Prefix.str() + "-LABEL"; 789 case Check::CheckEmpty: 790 return Prefix.str() + "-EMPTY"; 791 case Check::CheckEOF: 792 return "implicit EOF"; 793 case Check::CheckBadNot: 794 return "bad NOT"; 795 } 796 llvm_unreachable("unknown CheckType"); 797 } 798 799 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 800 if (Buffer.size() <= Prefix.size()) 801 return Check::CheckNone; 802 803 char NextChar = Buffer[Prefix.size()]; 804 805 // Verify that the : is present after the prefix. 806 if (NextChar == ':') 807 return Check::CheckPlain; 808 809 if (NextChar != '-') 810 return Check::CheckNone; 811 812 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 813 if (Rest.startswith("NEXT:")) 814 return Check::CheckNext; 815 816 if (Rest.startswith("SAME:")) 817 return Check::CheckSame; 818 819 if (Rest.startswith("NOT:")) 820 return Check::CheckNot; 821 822 if (Rest.startswith("DAG:")) 823 return Check::CheckDAG; 824 825 if (Rest.startswith("LABEL:")) 826 return Check::CheckLabel; 827 828 if (Rest.startswith("EMPTY:")) 829 return Check::CheckEmpty; 830 831 // You can't combine -NOT with another suffix. 832 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 833 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 834 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") || 835 Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:")) 836 return Check::CheckBadNot; 837 838 return Check::CheckNone; 839 } 840 841 // From the given position, find the next character after the word. 842 static size_t SkipWord(StringRef Str, size_t Loc) { 843 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 844 ++Loc; 845 return Loc; 846 } 847 848 /// Search the buffer for the first prefix in the prefix regular expression. 849 /// 850 /// This searches the buffer using the provided regular expression, however it 851 /// enforces constraints beyond that: 852 /// 1) The found prefix must not be a suffix of something that looks like 853 /// a valid prefix. 854 /// 2) The found prefix must be followed by a valid check type suffix using \c 855 /// FindCheckType above. 856 /// 857 /// The first match of the regular expression to satisfy these two is returned, 858 /// otherwise an empty StringRef is returned to indicate failure. 859 /// 860 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 861 /// start at the beginning of the returned prefix, increment \p LineNumber for 862 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 863 /// check found by examining the suffix. 864 /// 865 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 866 /// is unspecified. 867 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, 868 unsigned &LineNumber, 869 Check::CheckType &CheckTy) { 870 SmallVector<StringRef, 2> Matches; 871 872 while (!Buffer.empty()) { 873 // Find the first (longest) match using the RE. 874 if (!PrefixRE.match(Buffer, &Matches)) 875 // No match at all, bail. 876 return StringRef(); 877 878 StringRef Prefix = Matches[0]; 879 Matches.clear(); 880 881 assert(Prefix.data() >= Buffer.data() && 882 Prefix.data() < Buffer.data() + Buffer.size() && 883 "Prefix doesn't start inside of buffer!"); 884 size_t Loc = Prefix.data() - Buffer.data(); 885 StringRef Skipped = Buffer.substr(0, Loc); 886 Buffer = Buffer.drop_front(Loc); 887 LineNumber += Skipped.count('\n'); 888 889 // Check that the matched prefix isn't a suffix of some other check-like 890 // word. 891 // FIXME: This is a very ad-hoc check. it would be better handled in some 892 // other way. Among other things it seems hard to distinguish between 893 // intentional and unintentional uses of this feature. 894 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 895 // Now extract the type. 896 CheckTy = FindCheckType(Buffer, Prefix); 897 898 // If we've found a valid check type for this prefix, we're done. 899 if (CheckTy != Check::CheckNone) 900 return Prefix; 901 } 902 903 // If we didn't successfully find a prefix, we need to skip this invalid 904 // prefix and continue scanning. We directly skip the prefix that was 905 // matched and any additional parts of that check-like word. 906 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 907 } 908 909 // We ran out of buffer while skipping partial matches so give up. 910 return StringRef(); 911 } 912 913 /// Read the check file, which specifies the sequence of expected strings. 914 /// 915 /// The strings are added to the CheckStrings vector. Returns true in case of 916 /// an error, false otherwise. 917 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 918 std::vector<CheckString> &CheckStrings) { 919 std::vector<Pattern> ImplicitNegativeChecks; 920 for (const auto &PatternString : ImplicitCheckNot) { 921 // Create a buffer with fake command line content in order to display the 922 // command line option responsible for the specific implicit CHECK-NOT. 923 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 924 std::string Suffix = "'"; 925 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 926 Prefix + PatternString + Suffix, "command line"); 927 928 StringRef PatternInBuffer = 929 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 930 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 931 932 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 933 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 934 "IMPLICIT-CHECK", SM, 0); 935 } 936 937 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 938 939 // LineNumber keeps track of the line on which CheckPrefix instances are 940 // found. 941 unsigned LineNumber = 1; 942 943 while (1) { 944 Check::CheckType CheckTy; 945 946 // See if a prefix occurs in the memory buffer. 947 StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, 948 CheckTy); 949 if (UsedPrefix.empty()) 950 break; 951 assert(UsedPrefix.data() == Buffer.data() && 952 "Failed to move Buffer's start forward, or pointed prefix outside " 953 "of the buffer!"); 954 955 // Location to use for error messages. 956 const char *UsedPrefixStart = UsedPrefix.data(); 957 958 // Skip the buffer to the end. 959 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 960 961 // Complain about useful-looking but unsupported suffixes. 962 if (CheckTy == Check::CheckBadNot) { 963 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 964 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 965 return true; 966 } 967 968 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 969 // leading whitespace. 970 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 971 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 972 973 // Scan ahead to the end of line. 974 size_t EOL = Buffer.find_first_of("\n\r"); 975 976 // Remember the location of the start of the pattern, for diagnostics. 977 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 978 979 // Parse the pattern. 980 Pattern P(CheckTy); 981 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 982 return true; 983 984 // Verify that CHECK-LABEL lines do not define or use variables 985 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 986 SM.PrintMessage( 987 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 988 "found '" + UsedPrefix + "-LABEL:'" 989 " with variable definition or use"); 990 return true; 991 } 992 993 Buffer = Buffer.substr(EOL); 994 995 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. 996 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || 997 CheckTy == Check::CheckEmpty) && 998 CheckStrings.empty()) { 999 StringRef Type = CheckTy == Check::CheckNext 1000 ? "NEXT" 1001 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; 1002 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 1003 SourceMgr::DK_Error, 1004 "found '" + UsedPrefix + "-" + Type + 1005 "' without previous '" + UsedPrefix + ": line"); 1006 return true; 1007 } 1008 1009 // Handle CHECK-DAG/-NOT. 1010 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 1011 DagNotMatches.push_back(P); 1012 continue; 1013 } 1014 1015 // Okay, add the string we captured to the output vector and move on. 1016 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 1017 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 1018 DagNotMatches = ImplicitNegativeChecks; 1019 } 1020 1021 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 1022 // prefix as a filler for the error message. 1023 if (!DagNotMatches.empty()) { 1024 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 1025 SMLoc::getFromPointer(Buffer.data())); 1026 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 1027 } 1028 1029 if (CheckStrings.empty()) { 1030 errs() << "error: no check strings found with prefix" 1031 << (CheckPrefixes.size() > 1 ? "es " : " "); 1032 prefix_iterator I = CheckPrefixes.begin(); 1033 prefix_iterator E = CheckPrefixes.end(); 1034 if (I != E) { 1035 errs() << "\'" << *I << ":'"; 1036 ++I; 1037 } 1038 for (; I != E; ++I) 1039 errs() << ", \'" << *I << ":'"; 1040 1041 errs() << '\n'; 1042 return true; 1043 } 1044 1045 return false; 1046 } 1047 1048 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, 1049 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 1050 StringRef Buffer, StringMap<StringRef> &VariableTable, 1051 size_t MatchPos, size_t MatchLen) { 1052 if (ExpectedMatch) { 1053 if (!Verbose) 1054 return; 1055 if (!VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF) 1056 return; 1057 } 1058 SMLoc MatchStart = SMLoc::getFromPointer(Buffer.data() + MatchPos); 1059 SMLoc MatchEnd = SMLoc::getFromPointer(Buffer.data() + MatchPos + MatchLen); 1060 SMRange MatchRange(MatchStart, MatchEnd); 1061 SM.PrintMessage( 1062 Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, 1063 CheckTypeName(Prefix, Pat.getCheckTy()) + ": " + 1064 (ExpectedMatch ? "expected" : "excluded") + 1065 " string found in input"); 1066 SM.PrintMessage(MatchStart, SourceMgr::DK_Note, "found here", {MatchRange}); 1067 Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange); 1068 } 1069 1070 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, 1071 const CheckString &CheckStr, StringRef Buffer, 1072 StringMap<StringRef> &VariableTable, size_t MatchPos, 1073 size_t MatchLen) { 1074 PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, 1075 Buffer, VariableTable, MatchPos, MatchLen); 1076 } 1077 1078 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, 1079 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 1080 StringRef Buffer, 1081 StringMap<StringRef> &VariableTable) { 1082 if (!ExpectedMatch && !VerboseVerbose) 1083 return; 1084 1085 // Otherwise, we have an error, emit an error message. 1086 SM.PrintMessage(Loc, 1087 ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, 1088 CheckTypeName(Prefix, Pat.getCheckTy()) + ": " + 1089 (ExpectedMatch ? "expected" : "excluded") + 1090 " string not found in input"); 1091 1092 // Print the "scanning from here" line. If the current position is at the 1093 // end of a line, advance to the start of the next line. 1094 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 1095 1096 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1097 "scanning from here"); 1098 1099 // Allow the pattern to print additional information if desired. 1100 Pat.PrintVariableUses(SM, Buffer, VariableTable); 1101 if (ExpectedMatch) 1102 Pat.PrintFuzzyMatch(SM, Buffer, VariableTable); 1103 } 1104 1105 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, 1106 const CheckString &CheckStr, StringRef Buffer, 1107 StringMap<StringRef> &VariableTable) { 1108 PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, 1109 Buffer, VariableTable); 1110 } 1111 1112 /// Count the number of newlines in the specified range. 1113 static unsigned CountNumNewlinesBetween(StringRef Range, 1114 const char *&FirstNewLine) { 1115 unsigned NumNewLines = 0; 1116 while (1) { 1117 // Scan for newline. 1118 Range = Range.substr(Range.find_first_of("\n\r")); 1119 if (Range.empty()) 1120 return NumNewLines; 1121 1122 ++NumNewLines; 1123 1124 // Handle \n\r and \r\n as a single newline. 1125 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 1126 (Range[0] != Range[1])) 1127 Range = Range.substr(1); 1128 Range = Range.substr(1); 1129 1130 if (NumNewLines == 1) 1131 FirstNewLine = Range.begin(); 1132 } 1133 } 1134 1135 /// Match check string and its "not strings" and/or "dag strings". 1136 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1137 bool IsLabelScanMode, size_t &MatchLen, 1138 StringMap<StringRef> &VariableTable) const { 1139 size_t LastPos = 0; 1140 std::vector<const Pattern *> NotStrings; 1141 1142 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1143 // bounds; we have not processed variable definitions within the bounded block 1144 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1145 // over the block again (including the last CHECK-LABEL) in normal mode. 1146 if (!IsLabelScanMode) { 1147 // Match "dag strings" (with mixed "not strings" if any). 1148 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1149 if (LastPos == StringRef::npos) 1150 return StringRef::npos; 1151 } 1152 1153 // Match itself from the last position after matching CHECK-DAG. 1154 StringRef MatchBuffer = Buffer.substr(LastPos); 1155 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1156 if (MatchPos == StringRef::npos) { 1157 PrintNoMatch(true, SM, *this, MatchBuffer, VariableTable); 1158 return StringRef::npos; 1159 } 1160 PrintMatch(true, SM, *this, MatchBuffer, VariableTable, MatchPos, MatchLen); 1161 1162 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1163 // or CHECK-NOT 1164 if (!IsLabelScanMode) { 1165 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1166 1167 // If this check is a "CHECK-NEXT", verify that the previous match was on 1168 // the previous line (i.e. that there is one newline between them). 1169 if (CheckNext(SM, SkippedRegion)) 1170 return StringRef::npos; 1171 1172 // If this check is a "CHECK-SAME", verify that the previous match was on 1173 // the same line (i.e. that there is no newline between them). 1174 if (CheckSame(SM, SkippedRegion)) 1175 return StringRef::npos; 1176 1177 // If this match had "not strings", verify that they don't exist in the 1178 // skipped region. 1179 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1180 return StringRef::npos; 1181 } 1182 1183 return LastPos + MatchPos; 1184 } 1185 1186 /// Verify there is a single line in the given buffer. 1187 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1188 if (Pat.getCheckTy() != Check::CheckNext && 1189 Pat.getCheckTy() != Check::CheckEmpty) 1190 return false; 1191 1192 Twine CheckName = 1193 Prefix + 1194 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); 1195 1196 // Count the number of newlines between the previous match and this one. 1197 assert(Buffer.data() != 1198 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1199 SMLoc::getFromPointer(Buffer.data()))) 1200 ->getBufferStart() && 1201 "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file"); 1202 1203 const char *FirstNewLine = nullptr; 1204 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1205 1206 if (NumNewLines == 0) { 1207 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1208 CheckName + ": is on the same line as previous match"); 1209 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1210 "'next' match was here"); 1211 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1212 "previous match ended here"); 1213 return true; 1214 } 1215 1216 if (NumNewLines != 1) { 1217 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1218 CheckName + 1219 ": is not on the line after the previous match"); 1220 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1221 "'next' match was here"); 1222 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1223 "previous match ended here"); 1224 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1225 "non-matching line after previous match is here"); 1226 return true; 1227 } 1228 1229 return false; 1230 } 1231 1232 /// Verify there is no newline in the given buffer. 1233 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1234 if (Pat.getCheckTy() != Check::CheckSame) 1235 return false; 1236 1237 // Count the number of newlines between the previous match and this one. 1238 assert(Buffer.data() != 1239 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1240 SMLoc::getFromPointer(Buffer.data()))) 1241 ->getBufferStart() && 1242 "CHECK-SAME can't be the first check in a file"); 1243 1244 const char *FirstNewLine = nullptr; 1245 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1246 1247 if (NumNewLines != 0) { 1248 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1249 Prefix + 1250 "-SAME: is not on the same line as the previous match"); 1251 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1252 "'next' match was here"); 1253 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1254 "previous match ended here"); 1255 return true; 1256 } 1257 1258 return false; 1259 } 1260 1261 /// Verify there's no "not strings" in the given buffer. 1262 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1263 const std::vector<const Pattern *> &NotStrings, 1264 StringMap<StringRef> &VariableTable) const { 1265 for (const Pattern *Pat : NotStrings) { 1266 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1267 1268 size_t MatchLen = 0; 1269 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1270 1271 if (Pos == StringRef::npos) { 1272 PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer, 1273 VariableTable); 1274 continue; 1275 } 1276 1277 PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer, VariableTable, 1278 Pos, MatchLen); 1279 1280 return true; 1281 } 1282 1283 return false; 1284 } 1285 1286 /// Match "dag strings" and their mixed "not strings". 1287 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1288 std::vector<const Pattern *> &NotStrings, 1289 StringMap<StringRef> &VariableTable) const { 1290 if (DagNotStrings.empty()) 1291 return 0; 1292 1293 // The start of the search range. 1294 size_t StartPos = 0; 1295 1296 struct MatchRange { 1297 size_t Pos; 1298 size_t End; 1299 }; 1300 // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match 1301 // ranges are erased from this list once they are no longer in the search 1302 // range. 1303 std::list<MatchRange> MatchRanges; 1304 1305 // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG 1306 // group, so we don't use a range-based for loop here. 1307 for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end(); 1308 PatItr != PatEnd; ++PatItr) { 1309 const Pattern &Pat = *PatItr; 1310 assert((Pat.getCheckTy() == Check::CheckDAG || 1311 Pat.getCheckTy() == Check::CheckNot) && 1312 "Invalid CHECK-DAG or CHECK-NOT!"); 1313 1314 if (Pat.getCheckTy() == Check::CheckNot) { 1315 NotStrings.push_back(&Pat); 1316 continue; 1317 } 1318 1319 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1320 1321 // CHECK-DAG always matches from the start. 1322 size_t MatchLen = 0, MatchPos = StartPos; 1323 1324 // Search for a match that doesn't overlap a previous match in this 1325 // CHECK-DAG group. 1326 for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) { 1327 StringRef MatchBuffer = Buffer.substr(MatchPos); 1328 size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1329 // With a group of CHECK-DAGs, a single mismatching means the match on 1330 // that group of CHECK-DAGs fails immediately. 1331 if (MatchPosBuf == StringRef::npos) { 1332 PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, MatchBuffer, 1333 VariableTable); 1334 return StringRef::npos; 1335 } 1336 // Re-calc it as the offset relative to the start of the original string. 1337 MatchPos += MatchPosBuf; 1338 if (VerboseVerbose) 1339 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable, 1340 MatchPos, MatchLen); 1341 MatchRange M{MatchPos, MatchPos + MatchLen}; 1342 if (AllowDeprecatedDagOverlap) { 1343 // We don't need to track all matches in this mode, so we just maintain 1344 // one match range that encompasses the current CHECK-DAG group's 1345 // matches. 1346 if (MatchRanges.empty()) 1347 MatchRanges.insert(MatchRanges.end(), M); 1348 else { 1349 auto Block = MatchRanges.begin(); 1350 Block->Pos = std::min(Block->Pos, M.Pos); 1351 Block->End = std::max(Block->End, M.End); 1352 } 1353 break; 1354 } 1355 // Iterate previous matches until overlapping match or insertion point. 1356 bool Overlap = false; 1357 for (; MI != ME; ++MI) { 1358 if (M.Pos < MI->End) { 1359 // !Overlap => New match has no overlap and is before this old match. 1360 // Overlap => New match overlaps this old match. 1361 Overlap = MI->Pos < M.End; 1362 break; 1363 } 1364 } 1365 if (!Overlap) { 1366 // Insert non-overlapping match into list. 1367 MatchRanges.insert(MI, M); 1368 break; 1369 } 1370 if (VerboseVerbose) { 1371 SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos); 1372 SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End); 1373 SMRange OldRange(OldStart, OldEnd); 1374 SM.PrintMessage(OldStart, SourceMgr::DK_Note, 1375 "match discarded, overlaps earlier DAG match here", 1376 {OldRange}); 1377 } 1378 MatchPos = MI->End; 1379 } 1380 if (!VerboseVerbose) 1381 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable, 1382 MatchPos, MatchLen); 1383 1384 // Handle the end of a CHECK-DAG group. 1385 if (std::next(PatItr) == PatEnd || 1386 std::next(PatItr)->getCheckTy() == Check::CheckNot) { 1387 if (!NotStrings.empty()) { 1388 // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to 1389 // CHECK-DAG, verify that there are no 'not' strings occurred in that 1390 // region. 1391 StringRef SkippedRegion = 1392 Buffer.slice(StartPos, MatchRanges.begin()->Pos); 1393 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1394 return StringRef::npos; 1395 // Clear "not strings". 1396 NotStrings.clear(); 1397 } 1398 // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the 1399 // end of this CHECK-DAG group's match range. 1400 StartPos = MatchRanges.rbegin()->End; 1401 // Don't waste time checking for (impossible) overlaps before that. 1402 MatchRanges.clear(); 1403 } 1404 } 1405 1406 return StartPos; 1407 } 1408 1409 // A check prefix must contain only alphanumeric, hyphens and underscores. 1410 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1411 Regex Validator("^[a-zA-Z0-9_-]*$"); 1412 return Validator.match(CheckPrefix); 1413 } 1414 1415 static bool ValidateCheckPrefixes() { 1416 StringSet<> PrefixSet; 1417 1418 for (StringRef Prefix : CheckPrefixes) { 1419 // Reject empty prefixes. 1420 if (Prefix == "") 1421 return false; 1422 1423 if (!PrefixSet.insert(Prefix).second) 1424 return false; 1425 1426 if (!ValidateCheckPrefix(Prefix)) 1427 return false; 1428 } 1429 1430 return true; 1431 } 1432 1433 // Combines the check prefixes into a single regex so that we can efficiently 1434 // scan for any of the set. 1435 // 1436 // The semantics are that the longest-match wins which matches our regex 1437 // library. 1438 static Regex buildCheckPrefixRegex() { 1439 // I don't think there's a way to specify an initial value for cl::list, 1440 // so if nothing was specified, add the default 1441 if (CheckPrefixes.empty()) 1442 CheckPrefixes.push_back("CHECK"); 1443 1444 // We already validated the contents of CheckPrefixes so just concatenate 1445 // them as alternatives. 1446 SmallString<32> PrefixRegexStr; 1447 for (StringRef Prefix : CheckPrefixes) { 1448 if (Prefix != CheckPrefixes.front()) 1449 PrefixRegexStr.push_back('|'); 1450 1451 PrefixRegexStr.append(Prefix); 1452 } 1453 1454 return Regex(PrefixRegexStr); 1455 } 1456 1457 static void DumpCommandLine(int argc, char **argv) { 1458 errs() << "FileCheck command line: "; 1459 for (int I = 0; I < argc; I++) 1460 errs() << " " << argv[I]; 1461 errs() << "\n"; 1462 } 1463 1464 // Remove local variables from \p VariableTable. Global variables 1465 // (start with '$') are preserved. 1466 static void ClearLocalVars(StringMap<StringRef> &VariableTable) { 1467 SmallVector<StringRef, 16> LocalVars; 1468 for (const auto &Var : VariableTable) 1469 if (Var.first()[0] != '$') 1470 LocalVars.push_back(Var.first()); 1471 1472 for (const auto &Var : LocalVars) 1473 VariableTable.erase(Var); 1474 } 1475 1476 /// Check the input to FileCheck provided in the \p Buffer against the \p 1477 /// CheckStrings read from the check file. 1478 /// 1479 /// Returns false if the input fails to satisfy the checks. 1480 bool CheckInput(SourceMgr &SM, StringRef Buffer, 1481 ArrayRef<CheckString> CheckStrings) { 1482 bool ChecksFailed = false; 1483 1484 /// VariableTable - This holds all the current filecheck variables. 1485 StringMap<StringRef> VariableTable; 1486 1487 for (const auto& Def : GlobalDefines) 1488 VariableTable.insert(StringRef(Def).split('=')); 1489 1490 unsigned i = 0, j = 0, e = CheckStrings.size(); 1491 while (true) { 1492 StringRef CheckRegion; 1493 if (j == e) { 1494 CheckRegion = Buffer; 1495 } else { 1496 const CheckString &CheckLabelStr = CheckStrings[j]; 1497 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1498 ++j; 1499 continue; 1500 } 1501 1502 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1503 size_t MatchLabelLen = 0; 1504 size_t MatchLabelPos = 1505 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable); 1506 if (MatchLabelPos == StringRef::npos) 1507 // Immediately bail of CHECK-LABEL fails, nothing else we can do. 1508 return false; 1509 1510 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1511 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1512 ++j; 1513 } 1514 1515 if (EnableVarScope) 1516 ClearLocalVars(VariableTable); 1517 1518 for (; i != j; ++i) { 1519 const CheckString &CheckStr = CheckStrings[i]; 1520 1521 // Check each string within the scanned region, including a second check 1522 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1523 size_t MatchLen = 0; 1524 size_t MatchPos = 1525 CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable); 1526 1527 if (MatchPos == StringRef::npos) { 1528 ChecksFailed = true; 1529 i = j; 1530 break; 1531 } 1532 1533 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1534 } 1535 1536 if (j == e) 1537 break; 1538 } 1539 1540 // Success if no checks failed. 1541 return !ChecksFailed; 1542 } 1543 1544 int main(int argc, char **argv) { 1545 InitLLVM X(argc, argv); 1546 cl::ParseCommandLineOptions(argc, argv); 1547 1548 if (!ValidateCheckPrefixes()) { 1549 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1550 "start with a letter and contain only alphanumeric characters, " 1551 "hyphens and underscores\n"; 1552 return 2; 1553 } 1554 1555 Regex PrefixRE = buildCheckPrefixRegex(); 1556 std::string REError; 1557 if (!PrefixRE.isValid(REError)) { 1558 errs() << "Unable to combine check-prefix strings into a prefix regular " 1559 "expression! This is likely a bug in FileCheck's verification of " 1560 "the check-prefix strings. Regular expression parsing failed " 1561 "with the following error: " 1562 << REError << "\n"; 1563 return 2; 1564 } 1565 1566 if (VerboseVerbose) 1567 Verbose = true; 1568 1569 SourceMgr SM; 1570 1571 // Read the expected strings from the check file. 1572 ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 1573 MemoryBuffer::getFileOrSTDIN(CheckFilename); 1574 if (std::error_code EC = CheckFileOrErr.getError()) { 1575 errs() << "Could not open check file '" << CheckFilename 1576 << "': " << EC.message() << '\n'; 1577 return 2; 1578 } 1579 MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 1580 1581 SmallString<4096> CheckFileBuffer; 1582 StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer); 1583 1584 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1585 CheckFileText, CheckFile.getBufferIdentifier()), 1586 SMLoc()); 1587 1588 std::vector<CheckString> CheckStrings; 1589 if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings)) 1590 return 2; 1591 1592 // Open the file to check and add it to SourceMgr. 1593 ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1594 MemoryBuffer::getFileOrSTDIN(InputFilename); 1595 if (std::error_code EC = InputFileOrErr.getError()) { 1596 errs() << "Could not open input file '" << InputFilename 1597 << "': " << EC.message() << '\n'; 1598 return 2; 1599 } 1600 MemoryBuffer &InputFile = *InputFileOrErr.get(); 1601 1602 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1603 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1604 DumpCommandLine(argc, argv); 1605 return 2; 1606 } 1607 1608 SmallString<4096> InputFileBuffer; 1609 StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer); 1610 1611 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1612 InputFileText, InputFile.getBufferIdentifier()), 1613 SMLoc()); 1614 1615 int ExitCode = CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1616 if (ExitCode == 1 && DumpInputOnFailure) 1617 errs() << "Full input was:\n<<<<<<\n" << InputFileText << "\n>>>>>>\n"; 1618 1619 return ExitCode; 1620 } 1621