1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines APIs for analyzing the format strings of printf, fscanf, 11 // and friends. 12 // 13 // The structure of format strings for fprintf are described in C99 7.19.6.1. 14 // 15 // The structure of format strings for fscanf are described in C99 7.19.6.2. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H 20 #define LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H 21 22 #include "clang/AST/CanonicalType.h" 23 24 namespace clang { 25 26 class TargetInfo; 27 28 //===----------------------------------------------------------------------===// 29 /// Common components of both fprintf and fscanf format strings. 30 namespace analyze_format_string { 31 32 /// Class representing optional flags with location and representation 33 /// information. 34 class OptionalFlag { 35 public: 36 OptionalFlag(const char *Representation) 37 : representation(Representation), flag(false) {} 38 bool isSet() const { return flag; } 39 void set() { flag = true; } 40 void clear() { flag = false; } 41 void setPosition(const char *position) { 42 assert(position); 43 flag = true; 44 this->position = position; 45 } 46 const char *getPosition() const { 47 assert(position); 48 return position; 49 } 50 const char *toString() const { return representation; } 51 52 // Overloaded operators for bool like qualities 53 explicit operator bool() const { return flag; } 54 OptionalFlag& operator=(const bool &rhs) { 55 flag = rhs; 56 return *this; // Return a reference to myself. 57 } 58 private: 59 const char *representation; 60 const char *position; 61 bool flag; 62 }; 63 64 /// Represents the length modifier in a format string in scanf/printf. 65 class LengthModifier { 66 public: 67 enum Kind { 68 None, 69 AsChar, // 'hh' 70 AsShort, // 'h' 71 AsLong, // 'l' 72 AsLongLong, // 'll' 73 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 74 AsIntMax, // 'j' 75 AsSizeT, // 'z' 76 AsPtrDiff, // 't' 77 AsInt32, // 'I32' (MSVCRT, like __int32) 78 AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL) 79 AsInt64, // 'I64' (MSVCRT, like __int64) 80 AsLongDouble, // 'L' 81 AsAllocate, // for '%as', GNU extension to C90 scanf 82 AsMAllocate, // for '%ms', GNU extension to scanf 83 AsWide, // 'w' (MSVCRT, like l but only for c, C, s, S, or Z 84 AsWideChar = AsLong // for '%ls', only makes sense for printf 85 }; 86 87 LengthModifier() 88 : Position(nullptr), kind(None) {} 89 LengthModifier(const char *pos, Kind k) 90 : Position(pos), kind(k) {} 91 92 const char *getStart() const { 93 return Position; 94 } 95 96 unsigned getLength() const { 97 switch (kind) { 98 default: 99 return 1; 100 case AsLongLong: 101 case AsChar: 102 return 2; 103 case AsInt32: 104 case AsInt64: 105 return 3; 106 case None: 107 return 0; 108 } 109 } 110 111 Kind getKind() const { return kind; } 112 void setKind(Kind k) { kind = k; } 113 114 const char *toString() const; 115 116 private: 117 const char *Position; 118 Kind kind; 119 }; 120 121 class ConversionSpecifier { 122 public: 123 enum Kind { 124 InvalidSpecifier = 0, 125 // C99 conversion specifiers. 126 cArg, 127 dArg, 128 DArg, // Apple extension 129 iArg, 130 IntArgBeg = dArg, 131 IntArgEnd = iArg, 132 133 oArg, 134 OArg, // Apple extension 135 uArg, 136 UArg, // Apple extension 137 xArg, 138 XArg, 139 UIntArgBeg = oArg, 140 UIntArgEnd = XArg, 141 142 fArg, 143 FArg, 144 eArg, 145 EArg, 146 gArg, 147 GArg, 148 aArg, 149 AArg, 150 DoubleArgBeg = fArg, 151 DoubleArgEnd = AArg, 152 153 sArg, 154 pArg, 155 nArg, 156 PercentArg, 157 CArg, 158 SArg, 159 160 // Apple extension: P specifies to os_log that the data being pointed to is 161 // to be copied by os_log. The precision indicates the number of bytes to 162 // copy. 163 PArg, 164 165 // ** Printf-specific ** 166 167 ZArg, // MS extension 168 169 // Objective-C specific specifiers. 170 ObjCObjArg, // '@' 171 ObjCBeg = ObjCObjArg, 172 ObjCEnd = ObjCObjArg, 173 174 // FreeBSD kernel specific specifiers. 175 FreeBSDbArg, 176 FreeBSDDArg, 177 FreeBSDrArg, 178 FreeBSDyArg, 179 180 // GlibC specific specifiers. 181 PrintErrno, // 'm' 182 183 PrintfConvBeg = ObjCObjArg, 184 PrintfConvEnd = PrintErrno, 185 186 // ** Scanf-specific ** 187 ScanListArg, // '[' 188 ScanfConvBeg = ScanListArg, 189 ScanfConvEnd = ScanListArg 190 }; 191 192 ConversionSpecifier(bool isPrintf = true) 193 : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr), 194 kind(InvalidSpecifier) {} 195 196 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 197 : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {} 198 199 const char *getStart() const { 200 return Position; 201 } 202 203 StringRef getCharacters() const { 204 return StringRef(getStart(), getLength()); 205 } 206 207 bool consumesDataArgument() const { 208 switch (kind) { 209 case PrintErrno: 210 assert(IsPrintf); 211 return false; 212 case PercentArg: 213 return false; 214 case InvalidSpecifier: 215 return false; 216 default: 217 return true; 218 } 219 } 220 221 Kind getKind() const { return kind; } 222 void setKind(Kind k) { kind = k; } 223 unsigned getLength() const { 224 return EndScanList ? EndScanList - Position : 1; 225 } 226 void setEndScanList(const char *pos) { EndScanList = pos; } 227 228 bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) || 229 kind == FreeBSDrArg || kind == FreeBSDyArg; } 230 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 231 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; } 232 bool isDoubleArg() const { 233 return kind >= DoubleArgBeg && kind <= DoubleArgEnd; 234 } 235 236 const char *toString() const; 237 238 bool isPrintfKind() const { return IsPrintf; } 239 240 Optional<ConversionSpecifier> getStandardSpecifier() const; 241 242 protected: 243 bool IsPrintf; 244 const char *Position; 245 const char *EndScanList; 246 Kind kind; 247 }; 248 249 class ArgType { 250 public: 251 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 252 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 253 254 enum MatchKind { NoMatch = 0, Match = 1, NoMatchPedantic }; 255 256 private: 257 const Kind K; 258 QualType T; 259 const char *Name; 260 bool Ptr; 261 public: 262 ArgType(Kind k = UnknownTy, const char *n = nullptr) 263 : K(k), Name(n), Ptr(false) {} 264 ArgType(QualType t, const char *n = nullptr) 265 : K(SpecificTy), T(t), Name(n), Ptr(false) {} 266 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {} 267 268 static ArgType Invalid() { return ArgType(InvalidTy); } 269 bool isValid() const { return K != InvalidTy; } 270 271 /// Create an ArgType which corresponds to the type pointer to A. 272 static ArgType PtrTo(const ArgType& A) { 273 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown"); 274 ArgType Res = A; 275 Res.Ptr = true; 276 return Res; 277 } 278 279 MatchKind matchesType(ASTContext &C, QualType argTy) const; 280 281 QualType getRepresentativeType(ASTContext &C) const; 282 283 std::string getRepresentativeTypeName(ASTContext &C) const; 284 }; 285 286 class OptionalAmount { 287 public: 288 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 289 290 OptionalAmount(HowSpecified howSpecified, 291 unsigned amount, 292 const char *amountStart, 293 unsigned amountLength, 294 bool usesPositionalArg) 295 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 296 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 297 298 OptionalAmount(bool valid = true) 299 : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 300 UsesPositionalArg(0), UsesDotPrefix(0) {} 301 302 bool isInvalid() const { 303 return hs == Invalid; 304 } 305 306 HowSpecified getHowSpecified() const { return hs; } 307 void setHowSpecified(HowSpecified h) { hs = h; } 308 309 bool hasDataArgument() const { return hs == Arg; } 310 311 unsigned getArgIndex() const { 312 assert(hasDataArgument()); 313 return amt; 314 } 315 316 unsigned getConstantAmount() const { 317 assert(hs == Constant); 318 return amt; 319 } 320 321 const char *getStart() const { 322 // We include the . character if it is given. 323 return start - UsesDotPrefix; 324 } 325 326 unsigned getConstantLength() const { 327 assert(hs == Constant); 328 return length + UsesDotPrefix; 329 } 330 331 ArgType getArgType(ASTContext &Ctx) const; 332 333 void toString(raw_ostream &os) const; 334 335 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 336 unsigned getPositionalArgIndex() const { 337 assert(hasDataArgument()); 338 return amt + 1; 339 } 340 341 bool usesDotPrefix() const { return UsesDotPrefix; } 342 void setUsesDotPrefix() { UsesDotPrefix = true; } 343 344 private: 345 const char *start; 346 unsigned length; 347 HowSpecified hs; 348 unsigned amt; 349 bool UsesPositionalArg : 1; 350 bool UsesDotPrefix; 351 }; 352 353 354 class FormatSpecifier { 355 protected: 356 LengthModifier LM; 357 OptionalAmount FieldWidth; 358 ConversionSpecifier CS; 359 /// Positional arguments, an IEEE extension: 360 /// IEEE Std 1003.1, 2004 Edition 361 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 362 bool UsesPositionalArg; 363 unsigned argIndex; 364 public: 365 FormatSpecifier(bool isPrintf) 366 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 367 368 void setLengthModifier(LengthModifier lm) { 369 LM = lm; 370 } 371 372 void setUsesPositionalArg() { UsesPositionalArg = true; } 373 374 void setArgIndex(unsigned i) { 375 argIndex = i; 376 } 377 378 unsigned getArgIndex() const { 379 return argIndex; 380 } 381 382 unsigned getPositionalArgIndex() const { 383 return argIndex + 1; 384 } 385 386 const LengthModifier &getLengthModifier() const { 387 return LM; 388 } 389 390 const OptionalAmount &getFieldWidth() const { 391 return FieldWidth; 392 } 393 394 void setFieldWidth(const OptionalAmount &Amt) { 395 FieldWidth = Amt; 396 } 397 398 bool usesPositionalArg() const { return UsesPositionalArg; } 399 400 bool hasValidLengthModifier(const TargetInfo &Target) const; 401 402 bool hasStandardLengthModifier() const; 403 404 Optional<LengthModifier> getCorrectedLengthModifier() const; 405 406 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 407 408 bool hasStandardLengthConversionCombination() const; 409 410 /// For a TypedefType QT, if it is a named integer type such as size_t, 411 /// assign the appropriate value to LM and return true. 412 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM); 413 }; 414 415 } // end analyze_format_string namespace 416 417 //===----------------------------------------------------------------------===// 418 /// Pieces specific to fprintf format strings. 419 420 namespace analyze_printf { 421 422 class PrintfConversionSpecifier : 423 public analyze_format_string::ConversionSpecifier { 424 public: 425 PrintfConversionSpecifier() 426 : ConversionSpecifier(true, nullptr, InvalidSpecifier) {} 427 428 PrintfConversionSpecifier(const char *pos, Kind k) 429 : ConversionSpecifier(true, pos, k) {} 430 431 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 432 bool isDoubleArg() const { return kind >= DoubleArgBeg && 433 kind <= DoubleArgEnd; } 434 435 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 436 return CS->isPrintfKind(); 437 } 438 }; 439 440 using analyze_format_string::ArgType; 441 using analyze_format_string::LengthModifier; 442 using analyze_format_string::OptionalAmount; 443 using analyze_format_string::OptionalFlag; 444 445 class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 446 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 447 OptionalFlag IsLeftJustified; // '-' 448 OptionalFlag HasPlusPrefix; // '+' 449 OptionalFlag HasSpacePrefix; // ' ' 450 OptionalFlag HasAlternativeForm; // '#' 451 OptionalFlag HasLeadingZeroes; // '0' 452 OptionalFlag HasObjCTechnicalTerm; // '[tt]' 453 OptionalFlag IsPrivate; // '{private}' 454 OptionalFlag IsPublic; // '{public}' 455 OptionalAmount Precision; 456 public: 457 PrintfSpecifier() 458 : FormatSpecifier(/* isPrintf = */ true), HasThousandsGrouping("'"), 459 IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "), 460 HasAlternativeForm("#"), HasLeadingZeroes("0"), 461 HasObjCTechnicalTerm("tt"), IsPrivate("private"), IsPublic("public") {} 462 463 static PrintfSpecifier Parse(const char *beg, const char *end); 464 465 // Methods for incrementally constructing the PrintfSpecifier. 466 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 467 CS = cs; 468 } 469 void setHasThousandsGrouping(const char *position) { 470 HasThousandsGrouping.setPosition(position); 471 } 472 void setIsLeftJustified(const char *position) { 473 IsLeftJustified.setPosition(position); 474 } 475 void setHasPlusPrefix(const char *position) { 476 HasPlusPrefix.setPosition(position); 477 } 478 void setHasSpacePrefix(const char *position) { 479 HasSpacePrefix.setPosition(position); 480 } 481 void setHasAlternativeForm(const char *position) { 482 HasAlternativeForm.setPosition(position); 483 } 484 void setHasLeadingZeros(const char *position) { 485 HasLeadingZeroes.setPosition(position); 486 } 487 void setHasObjCTechnicalTerm(const char *position) { 488 HasObjCTechnicalTerm.setPosition(position); 489 } 490 void setIsPrivate(const char *position) { IsPrivate.setPosition(position); } 491 void setIsPublic(const char *position) { IsPublic.setPosition(position); } 492 void setUsesPositionalArg() { UsesPositionalArg = true; } 493 494 // Methods for querying the format specifier. 495 496 const PrintfConversionSpecifier &getConversionSpecifier() const { 497 return cast<PrintfConversionSpecifier>(CS); 498 } 499 500 void setPrecision(const OptionalAmount &Amt) { 501 Precision = Amt; 502 Precision.setUsesDotPrefix(); 503 } 504 505 const OptionalAmount &getPrecision() const { 506 return Precision; 507 } 508 509 bool consumesDataArgument() const { 510 return getConversionSpecifier().consumesDataArgument(); 511 } 512 513 /// \brief Returns the builtin type that a data argument 514 /// paired with this format specifier should have. This method 515 /// will return null if the format specifier does not have 516 /// a matching data argument or the matching argument matches 517 /// more than one type. 518 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 519 520 const OptionalFlag &hasThousandsGrouping() const { 521 return HasThousandsGrouping; 522 } 523 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 524 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 525 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 526 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 527 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 528 const OptionalFlag &hasObjCTechnicalTerm() const { return HasObjCTechnicalTerm; } 529 const OptionalFlag &isPrivate() const { return IsPrivate; } 530 const OptionalFlag &isPublic() const { return IsPublic; } 531 bool usesPositionalArg() const { return UsesPositionalArg; } 532 533 /// Changes the specifier and length according to a QualType, retaining any 534 /// flags or options. Returns true on success, or false when a conversion 535 /// was not successful. 536 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 537 bool IsObjCLiteral); 538 539 void toString(raw_ostream &os) const; 540 541 // Validation methods - to check if any element results in undefined behavior 542 bool hasValidPlusPrefix() const; 543 bool hasValidAlternativeForm() const; 544 bool hasValidLeadingZeros() const; 545 bool hasValidSpacePrefix() const; 546 bool hasValidLeftJustified() const; 547 bool hasValidThousandsGroupingPrefix() const; 548 549 bool hasValidPrecision() const; 550 bool hasValidFieldWidth() const; 551 }; 552 } // end analyze_printf namespace 553 554 //===----------------------------------------------------------------------===// 555 /// Pieces specific to fscanf format strings. 556 557 namespace analyze_scanf { 558 559 class ScanfConversionSpecifier : 560 public analyze_format_string::ConversionSpecifier { 561 public: 562 ScanfConversionSpecifier() 563 : ConversionSpecifier(false, nullptr, InvalidSpecifier) {} 564 565 ScanfConversionSpecifier(const char *pos, Kind k) 566 : ConversionSpecifier(false, pos, k) {} 567 568 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 569 return !CS->isPrintfKind(); 570 } 571 }; 572 573 using analyze_format_string::ArgType; 574 using analyze_format_string::LengthModifier; 575 using analyze_format_string::OptionalAmount; 576 using analyze_format_string::OptionalFlag; 577 578 class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 579 OptionalFlag SuppressAssignment; // '*' 580 public: 581 ScanfSpecifier() : 582 FormatSpecifier(/* isPrintf = */ false), 583 SuppressAssignment("*") {} 584 585 void setSuppressAssignment(const char *position) { 586 SuppressAssignment.setPosition(position); 587 } 588 589 const OptionalFlag &getSuppressAssignment() const { 590 return SuppressAssignment; 591 } 592 593 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 594 CS = cs; 595 } 596 597 const ScanfConversionSpecifier &getConversionSpecifier() const { 598 return cast<ScanfConversionSpecifier>(CS); 599 } 600 601 bool consumesDataArgument() const { 602 return CS.consumesDataArgument() && !SuppressAssignment; 603 } 604 605 ArgType getArgType(ASTContext &Ctx) const; 606 607 bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt, 608 ASTContext &Ctx); 609 610 void toString(raw_ostream &os) const; 611 612 static ScanfSpecifier Parse(const char *beg, const char *end); 613 }; 614 615 } // end analyze_scanf namespace 616 617 //===----------------------------------------------------------------------===// 618 // Parsing and processing of format strings (both fprintf and fscanf). 619 620 namespace analyze_format_string { 621 622 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 623 624 class FormatStringHandler { 625 public: 626 FormatStringHandler() {} 627 virtual ~FormatStringHandler(); 628 629 virtual void HandleNullChar(const char *nullCharacter) {} 630 631 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 632 633 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 634 PositionContext p) {} 635 636 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 637 638 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 639 unsigned specifierLen) {} 640 641 virtual void HandleEmptyObjCModifierFlag(const char *startFlags, 642 unsigned flagsLen) {} 643 644 virtual void HandleInvalidObjCModifierFlag(const char *startFlag, 645 unsigned flagLen) {} 646 647 virtual void HandleObjCFlagsWithNonObjCConversion(const char *flagsStart, 648 const char *flagsEnd, 649 const char *conversionPosition) {} 650 // Printf-specific handlers. 651 652 virtual bool HandleInvalidPrintfConversionSpecifier( 653 const analyze_printf::PrintfSpecifier &FS, 654 const char *startSpecifier, 655 unsigned specifierLen) { 656 return true; 657 } 658 659 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 660 const char *startSpecifier, 661 unsigned specifierLen) { 662 return true; 663 } 664 665 // Scanf-specific handlers. 666 667 virtual bool HandleInvalidScanfConversionSpecifier( 668 const analyze_scanf::ScanfSpecifier &FS, 669 const char *startSpecifier, 670 unsigned specifierLen) { 671 return true; 672 } 673 674 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 675 const char *startSpecifier, 676 unsigned specifierLen) { 677 return true; 678 } 679 680 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 681 }; 682 683 bool ParsePrintfString(FormatStringHandler &H, 684 const char *beg, const char *end, const LangOptions &LO, 685 const TargetInfo &Target, bool isFreeBSDKPrintf); 686 687 bool ParseFormatStringHasSArg(const char *beg, const char *end, 688 const LangOptions &LO, const TargetInfo &Target); 689 690 bool ParseScanfString(FormatStringHandler &H, 691 const char *beg, const char *end, const LangOptions &LO, 692 const TargetInfo &Target); 693 694 } // end analyze_format_string namespace 695 } // end clang namespace 696 #endif 697