1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines APIs for analyzing the format strings of printf, fscanf, 11 // and friends. 12 // 13 // The structure of format strings for fprintf are described in C99 7.19.6.1. 14 // 15 // The structure of format strings for fscanf are described in C99 7.19.6.2. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H 20 #define LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H 21 22 #include "clang/AST/CanonicalType.h" 23 24 namespace clang { 25 26 class TargetInfo; 27 28 //===----------------------------------------------------------------------===// 29 /// Common components of both fprintf and fscanf format strings. 30 namespace analyze_format_string { 31 32 /// Class representing optional flags with location and representation 33 /// information. 34 class OptionalFlag { 35 public: 36 OptionalFlag(const char *Representation) 37 : representation(Representation), flag(false) {} 38 bool isSet() { return flag; } 39 void set() { flag = true; } 40 void clear() { flag = false; } 41 void setPosition(const char *position) { 42 assert(position); 43 flag = true; 44 this->position = position; 45 } 46 const char *getPosition() const { 47 assert(position); 48 return position; 49 } 50 const char *toString() const { return representation; } 51 52 // Overloaded operators for bool like qualities 53 explicit operator bool() const { return flag; } 54 OptionalFlag& operator=(const bool &rhs) { 55 flag = rhs; 56 return *this; // Return a reference to myself. 57 } 58 private: 59 const char *representation; 60 const char *position; 61 bool flag; 62 }; 63 64 /// Represents the length modifier in a format string in scanf/printf. 65 class LengthModifier { 66 public: 67 enum Kind { 68 None, 69 AsChar, // 'hh' 70 AsShort, // 'h' 71 AsLong, // 'l' 72 AsLongLong, // 'll' 73 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 74 AsIntMax, // 'j' 75 AsSizeT, // 'z' 76 AsPtrDiff, // 't' 77 AsInt32, // 'I32' (MSVCRT, like __int32) 78 AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL) 79 AsInt64, // 'I64' (MSVCRT, like __int64) 80 AsLongDouble, // 'L' 81 AsAllocate, // for '%as', GNU extension to C90 scanf 82 AsMAllocate, // for '%ms', GNU extension to scanf 83 AsWide, // 'w' (MSVCRT, like l but only for c, C, s, S, or Z 84 AsWideChar = AsLong // for '%ls', only makes sense for printf 85 }; 86 87 LengthModifier() 88 : Position(nullptr), kind(None) {} 89 LengthModifier(const char *pos, Kind k) 90 : Position(pos), kind(k) {} 91 92 const char *getStart() const { 93 return Position; 94 } 95 96 unsigned getLength() const { 97 switch (kind) { 98 default: 99 return 1; 100 case AsLongLong: 101 case AsChar: 102 return 2; 103 case AsInt32: 104 case AsInt64: 105 return 3; 106 case None: 107 return 0; 108 } 109 } 110 111 Kind getKind() const { return kind; } 112 void setKind(Kind k) { kind = k; } 113 114 const char *toString() const; 115 116 private: 117 const char *Position; 118 Kind kind; 119 }; 120 121 class ConversionSpecifier { 122 public: 123 enum Kind { 124 InvalidSpecifier = 0, 125 // C99 conversion specifiers. 126 cArg, 127 dArg, 128 DArg, // Apple extension 129 iArg, 130 IntArgBeg = dArg, IntArgEnd = iArg, 131 132 oArg, 133 OArg, // Apple extension 134 uArg, 135 UArg, // Apple extension 136 xArg, 137 XArg, 138 UIntArgBeg = oArg, UIntArgEnd = XArg, 139 140 fArg, 141 FArg, 142 eArg, 143 EArg, 144 gArg, 145 GArg, 146 aArg, 147 AArg, 148 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 149 150 sArg, 151 pArg, 152 nArg, 153 PercentArg, 154 CArg, 155 SArg, 156 157 // ** Printf-specific ** 158 159 ZArg, // MS extension 160 161 // Objective-C specific specifiers. 162 ObjCObjArg, // '@' 163 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 164 165 // FreeBSD kernel specific specifiers. 166 FreeBSDbArg, 167 FreeBSDDArg, 168 FreeBSDrArg, 169 FreeBSDyArg, 170 171 // GlibC specific specifiers. 172 PrintErrno, // 'm' 173 174 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 175 176 // ** Scanf-specific ** 177 ScanListArg, // '[' 178 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 179 }; 180 181 ConversionSpecifier(bool isPrintf = true) 182 : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr), 183 kind(InvalidSpecifier) {} 184 185 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 186 : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {} 187 188 const char *getStart() const { 189 return Position; 190 } 191 192 StringRef getCharacters() const { 193 return StringRef(getStart(), getLength()); 194 } 195 196 bool consumesDataArgument() const { 197 switch (kind) { 198 case PrintErrno: 199 assert(IsPrintf); 200 return false; 201 case PercentArg: 202 return false; 203 default: 204 return true; 205 } 206 } 207 208 Kind getKind() const { return kind; } 209 void setKind(Kind k) { kind = k; } 210 unsigned getLength() const { 211 return EndScanList ? EndScanList - Position : 1; 212 } 213 214 bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) || 215 kind == FreeBSDrArg || kind == FreeBSDyArg; } 216 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 217 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; } 218 const char *toString() const; 219 220 bool isPrintfKind() const { return IsPrintf; } 221 222 Optional<ConversionSpecifier> getStandardSpecifier() const; 223 224 protected: 225 bool IsPrintf; 226 const char *Position; 227 const char *EndScanList; 228 Kind kind; 229 }; 230 231 class ArgType { 232 public: 233 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 234 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 235 236 enum MatchKind { NoMatch = 0, Match = 1, NoMatchPedantic }; 237 238 private: 239 const Kind K; 240 QualType T; 241 const char *Name; 242 bool Ptr; 243 public: 244 ArgType(Kind k = UnknownTy, const char *n = nullptr) 245 : K(k), Name(n), Ptr(false) {} 246 ArgType(QualType t, const char *n = nullptr) 247 : K(SpecificTy), T(t), Name(n), Ptr(false) {} 248 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {} 249 250 static ArgType Invalid() { return ArgType(InvalidTy); } 251 bool isValid() const { return K != InvalidTy; } 252 253 /// Create an ArgType which corresponds to the type pointer to A. 254 static ArgType PtrTo(const ArgType& A) { 255 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown"); 256 ArgType Res = A; 257 Res.Ptr = true; 258 return Res; 259 } 260 261 MatchKind matchesType(ASTContext &C, QualType argTy) const; 262 263 QualType getRepresentativeType(ASTContext &C) const; 264 265 std::string getRepresentativeTypeName(ASTContext &C) const; 266 }; 267 268 class OptionalAmount { 269 public: 270 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 271 272 OptionalAmount(HowSpecified howSpecified, 273 unsigned amount, 274 const char *amountStart, 275 unsigned amountLength, 276 bool usesPositionalArg) 277 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 278 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 279 280 OptionalAmount(bool valid = true) 281 : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 282 UsesPositionalArg(0), UsesDotPrefix(0) {} 283 284 bool isInvalid() const { 285 return hs == Invalid; 286 } 287 288 HowSpecified getHowSpecified() const { return hs; } 289 void setHowSpecified(HowSpecified h) { hs = h; } 290 291 bool hasDataArgument() const { return hs == Arg; } 292 293 unsigned getArgIndex() const { 294 assert(hasDataArgument()); 295 return amt; 296 } 297 298 unsigned getConstantAmount() const { 299 assert(hs == Constant); 300 return amt; 301 } 302 303 const char *getStart() const { 304 // We include the . character if it is given. 305 return start - UsesDotPrefix; 306 } 307 308 unsigned getConstantLength() const { 309 assert(hs == Constant); 310 return length + UsesDotPrefix; 311 } 312 313 ArgType getArgType(ASTContext &Ctx) const; 314 315 void toString(raw_ostream &os) const; 316 317 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 318 unsigned getPositionalArgIndex() const { 319 assert(hasDataArgument()); 320 return amt + 1; 321 } 322 323 bool usesDotPrefix() const { return UsesDotPrefix; } 324 void setUsesDotPrefix() { UsesDotPrefix = true; } 325 326 private: 327 const char *start; 328 unsigned length; 329 HowSpecified hs; 330 unsigned amt; 331 bool UsesPositionalArg : 1; 332 bool UsesDotPrefix; 333 }; 334 335 336 class FormatSpecifier { 337 protected: 338 LengthModifier LM; 339 OptionalAmount FieldWidth; 340 ConversionSpecifier CS; 341 /// Positional arguments, an IEEE extension: 342 /// IEEE Std 1003.1, 2004 Edition 343 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 344 bool UsesPositionalArg; 345 unsigned argIndex; 346 public: 347 FormatSpecifier(bool isPrintf) 348 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 349 350 void setLengthModifier(LengthModifier lm) { 351 LM = lm; 352 } 353 354 void setUsesPositionalArg() { UsesPositionalArg = true; } 355 356 void setArgIndex(unsigned i) { 357 argIndex = i; 358 } 359 360 unsigned getArgIndex() const { 361 return argIndex; 362 } 363 364 unsigned getPositionalArgIndex() const { 365 return argIndex + 1; 366 } 367 368 const LengthModifier &getLengthModifier() const { 369 return LM; 370 } 371 372 const OptionalAmount &getFieldWidth() const { 373 return FieldWidth; 374 } 375 376 void setFieldWidth(const OptionalAmount &Amt) { 377 FieldWidth = Amt; 378 } 379 380 bool usesPositionalArg() const { return UsesPositionalArg; } 381 382 bool hasValidLengthModifier(const TargetInfo &Target) const; 383 384 bool hasStandardLengthModifier() const; 385 386 Optional<LengthModifier> getCorrectedLengthModifier() const; 387 388 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 389 390 bool hasStandardLengthConversionCombination() const; 391 392 /// For a TypedefType QT, if it is a named integer type such as size_t, 393 /// assign the appropriate value to LM and return true. 394 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM); 395 }; 396 397 } // end analyze_format_string namespace 398 399 //===----------------------------------------------------------------------===// 400 /// Pieces specific to fprintf format strings. 401 402 namespace analyze_printf { 403 404 class PrintfConversionSpecifier : 405 public analyze_format_string::ConversionSpecifier { 406 public: 407 PrintfConversionSpecifier() 408 : ConversionSpecifier(true, nullptr, InvalidSpecifier) {} 409 410 PrintfConversionSpecifier(const char *pos, Kind k) 411 : ConversionSpecifier(true, pos, k) {} 412 413 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 414 bool isDoubleArg() const { return kind >= DoubleArgBeg && 415 kind <= DoubleArgEnd; } 416 unsigned getLength() const { 417 // Conversion specifiers currently only are represented by 418 // single characters, but we be flexible. 419 return 1; 420 } 421 422 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 423 return CS->isPrintfKind(); 424 } 425 }; 426 427 using analyze_format_string::ArgType; 428 using analyze_format_string::LengthModifier; 429 using analyze_format_string::OptionalAmount; 430 using analyze_format_string::OptionalFlag; 431 432 class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 433 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 434 OptionalFlag IsLeftJustified; // '-' 435 OptionalFlag HasPlusPrefix; // '+' 436 OptionalFlag HasSpacePrefix; // ' ' 437 OptionalFlag HasAlternativeForm; // '#' 438 OptionalFlag HasLeadingZeroes; // '0' 439 OptionalFlag HasObjCTechnicalTerm; // '[tt]' 440 OptionalAmount Precision; 441 public: 442 PrintfSpecifier() : 443 FormatSpecifier(/* isPrintf = */ true), 444 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 445 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0"), 446 HasObjCTechnicalTerm("tt") {} 447 448 static PrintfSpecifier Parse(const char *beg, const char *end); 449 450 // Methods for incrementally constructing the PrintfSpecifier. 451 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 452 CS = cs; 453 } 454 void setHasThousandsGrouping(const char *position) { 455 HasThousandsGrouping.setPosition(position); 456 } 457 void setIsLeftJustified(const char *position) { 458 IsLeftJustified.setPosition(position); 459 } 460 void setHasPlusPrefix(const char *position) { 461 HasPlusPrefix.setPosition(position); 462 } 463 void setHasSpacePrefix(const char *position) { 464 HasSpacePrefix.setPosition(position); 465 } 466 void setHasAlternativeForm(const char *position) { 467 HasAlternativeForm.setPosition(position); 468 } 469 void setHasLeadingZeros(const char *position) { 470 HasLeadingZeroes.setPosition(position); 471 } 472 void setHasObjCTechnicalTerm(const char *position) { 473 HasObjCTechnicalTerm.setPosition(position); 474 } 475 void setUsesPositionalArg() { UsesPositionalArg = true; } 476 477 // Methods for querying the format specifier. 478 479 const PrintfConversionSpecifier &getConversionSpecifier() const { 480 return cast<PrintfConversionSpecifier>(CS); 481 } 482 483 void setPrecision(const OptionalAmount &Amt) { 484 Precision = Amt; 485 Precision.setUsesDotPrefix(); 486 } 487 488 const OptionalAmount &getPrecision() const { 489 return Precision; 490 } 491 492 bool consumesDataArgument() const { 493 return getConversionSpecifier().consumesDataArgument(); 494 } 495 496 /// \brief Returns the builtin type that a data argument 497 /// paired with this format specifier should have. This method 498 /// will return null if the format specifier does not have 499 /// a matching data argument or the matching argument matches 500 /// more than one type. 501 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 502 503 const OptionalFlag &hasThousandsGrouping() const { 504 return HasThousandsGrouping; 505 } 506 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 507 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 508 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 509 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 510 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 511 const OptionalFlag &hasObjCTechnicalTerm() const { return HasObjCTechnicalTerm; } 512 bool usesPositionalArg() const { return UsesPositionalArg; } 513 514 /// Changes the specifier and length according to a QualType, retaining any 515 /// flags or options. Returns true on success, or false when a conversion 516 /// was not successful. 517 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 518 bool IsObjCLiteral); 519 520 void toString(raw_ostream &os) const; 521 522 // Validation methods - to check if any element results in undefined behavior 523 bool hasValidPlusPrefix() const; 524 bool hasValidAlternativeForm() const; 525 bool hasValidLeadingZeros() const; 526 bool hasValidSpacePrefix() const; 527 bool hasValidLeftJustified() const; 528 bool hasValidThousandsGroupingPrefix() const; 529 530 bool hasValidPrecision() const; 531 bool hasValidFieldWidth() const; 532 }; 533 } // end analyze_printf namespace 534 535 //===----------------------------------------------------------------------===// 536 /// Pieces specific to fscanf format strings. 537 538 namespace analyze_scanf { 539 540 class ScanfConversionSpecifier : 541 public analyze_format_string::ConversionSpecifier { 542 public: 543 ScanfConversionSpecifier() 544 : ConversionSpecifier(false, nullptr, InvalidSpecifier) {} 545 546 ScanfConversionSpecifier(const char *pos, Kind k) 547 : ConversionSpecifier(false, pos, k) {} 548 549 void setEndScanList(const char *pos) { EndScanList = pos; } 550 551 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 552 return !CS->isPrintfKind(); 553 } 554 }; 555 556 using analyze_format_string::ArgType; 557 using analyze_format_string::LengthModifier; 558 using analyze_format_string::OptionalAmount; 559 using analyze_format_string::OptionalFlag; 560 561 class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 562 OptionalFlag SuppressAssignment; // '*' 563 public: 564 ScanfSpecifier() : 565 FormatSpecifier(/* isPrintf = */ false), 566 SuppressAssignment("*") {} 567 568 void setSuppressAssignment(const char *position) { 569 SuppressAssignment.setPosition(position); 570 } 571 572 const OptionalFlag &getSuppressAssignment() const { 573 return SuppressAssignment; 574 } 575 576 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 577 CS = cs; 578 } 579 580 const ScanfConversionSpecifier &getConversionSpecifier() const { 581 return cast<ScanfConversionSpecifier>(CS); 582 } 583 584 bool consumesDataArgument() const { 585 return CS.consumesDataArgument() && !SuppressAssignment; 586 } 587 588 ArgType getArgType(ASTContext &Ctx) const; 589 590 bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt, 591 ASTContext &Ctx); 592 593 void toString(raw_ostream &os) const; 594 595 static ScanfSpecifier Parse(const char *beg, const char *end); 596 }; 597 598 } // end analyze_scanf namespace 599 600 //===----------------------------------------------------------------------===// 601 // Parsing and processing of format strings (both fprintf and fscanf). 602 603 namespace analyze_format_string { 604 605 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 606 607 class FormatStringHandler { 608 public: 609 FormatStringHandler() {} 610 virtual ~FormatStringHandler(); 611 612 virtual void HandleNullChar(const char *nullCharacter) {} 613 614 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 615 616 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 617 PositionContext p) {} 618 619 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 620 621 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 622 unsigned specifierLen) {} 623 624 virtual void HandleEmptyObjCModifierFlag(const char *startFlags, 625 unsigned flagsLen) {} 626 627 virtual void HandleInvalidObjCModifierFlag(const char *startFlag, 628 unsigned flagLen) {} 629 630 virtual void HandleObjCFlagsWithNonObjCConversion(const char *flagsStart, 631 const char *flagsEnd, 632 const char *conversionPosition) {} 633 // Printf-specific handlers. 634 635 virtual bool HandleInvalidPrintfConversionSpecifier( 636 const analyze_printf::PrintfSpecifier &FS, 637 const char *startSpecifier, 638 unsigned specifierLen) { 639 return true; 640 } 641 642 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 643 const char *startSpecifier, 644 unsigned specifierLen) { 645 return true; 646 } 647 648 // Scanf-specific handlers. 649 650 virtual bool HandleInvalidScanfConversionSpecifier( 651 const analyze_scanf::ScanfSpecifier &FS, 652 const char *startSpecifier, 653 unsigned specifierLen) { 654 return true; 655 } 656 657 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 658 const char *startSpecifier, 659 unsigned specifierLen) { 660 return true; 661 } 662 663 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 664 }; 665 666 bool ParsePrintfString(FormatStringHandler &H, 667 const char *beg, const char *end, const LangOptions &LO, 668 const TargetInfo &Target, bool isFreeBSDKPrintf); 669 670 bool ParseFormatStringHasSArg(const char *beg, const char *end, 671 const LangOptions &LO, const TargetInfo &Target); 672 673 bool ParseScanfString(FormatStringHandler &H, 674 const char *beg, const char *end, const LangOptions &LO, 675 const TargetInfo &Target); 676 677 } // end analyze_format_string namespace 678 } // end clang namespace 679 #endif 680