1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines APIs for analyzing the format strings of printf, fscanf, 11 // and friends. 12 // 13 // The structure of format strings for fprintf are described in C99 7.19.6.1. 14 // 15 // The structure of format strings for fscanf are described in C99 7.19.6.2. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #ifndef LLVM_CLANG_FORMAT_H 20 #define LLVM_CLANG_FORMAT_H 21 22 #include "clang/AST/CanonicalType.h" 23 24 namespace clang { 25 26 class TargetInfo; 27 28 //===----------------------------------------------------------------------===// 29 /// Common components of both fprintf and fscanf format strings. 30 namespace analyze_format_string { 31 32 /// Class representing optional flags with location and representation 33 /// information. 34 class OptionalFlag { 35 public: 36 OptionalFlag(const char *Representation) 37 : representation(Representation), flag(false) {} 38 bool isSet() { return flag; } 39 void set() { flag = true; } 40 void clear() { flag = false; } 41 void setPosition(const char *position) { 42 assert(position); 43 this->position = position; 44 } 45 const char *getPosition() const { 46 assert(position); 47 return position; 48 } 49 const char *toString() const { return representation; } 50 51 // Overloaded operators for bool like qualities 52 LLVM_EXPLICIT operator bool() const { return flag; } 53 OptionalFlag& operator=(const bool &rhs) { 54 flag = rhs; 55 return *this; // Return a reference to myself. 56 } 57 private: 58 const char *representation; 59 const char *position; 60 bool flag; 61 }; 62 63 /// Represents the length modifier in a format string in scanf/printf. 64 class LengthModifier { 65 public: 66 enum Kind { 67 None, 68 AsChar, // 'hh' 69 AsShort, // 'h' 70 AsLong, // 'l' 71 AsLongLong, // 'll' 72 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 73 AsIntMax, // 'j' 74 AsSizeT, // 'z' 75 AsPtrDiff, // 't' 76 AsInt32, // 'I32' (MSVCRT, like __int32) 77 AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL) 78 AsInt64, // 'I64' (MSVCRT, like __int64) 79 AsLongDouble, // 'L' 80 AsAllocate, // for '%as', GNU extension to C90 scanf 81 AsMAllocate, // for '%ms', GNU extension to scanf 82 AsWideChar = AsLong // for '%ls', only makes sense for printf 83 }; 84 85 LengthModifier() 86 : Position(nullptr), kind(None) {} 87 LengthModifier(const char *pos, Kind k) 88 : Position(pos), kind(k) {} 89 90 const char *getStart() const { 91 return Position; 92 } 93 94 unsigned getLength() const { 95 switch (kind) { 96 default: 97 return 1; 98 case AsLongLong: 99 case AsChar: 100 return 2; 101 case AsInt32: 102 case AsInt64: 103 return 3; 104 case None: 105 return 0; 106 } 107 } 108 109 Kind getKind() const { return kind; } 110 void setKind(Kind k) { kind = k; } 111 112 const char *toString() const; 113 114 private: 115 const char *Position; 116 Kind kind; 117 }; 118 119 class ConversionSpecifier { 120 public: 121 enum Kind { 122 InvalidSpecifier = 0, 123 // C99 conversion specifiers. 124 cArg, 125 dArg, 126 DArg, // Apple extension 127 iArg, 128 IntArgBeg = dArg, IntArgEnd = iArg, 129 130 oArg, 131 OArg, // Apple extension 132 uArg, 133 UArg, // Apple extension 134 xArg, 135 XArg, 136 UIntArgBeg = oArg, UIntArgEnd = XArg, 137 138 fArg, 139 FArg, 140 eArg, 141 EArg, 142 gArg, 143 GArg, 144 aArg, 145 AArg, 146 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 147 148 sArg, 149 pArg, 150 nArg, 151 PercentArg, 152 CArg, 153 SArg, 154 155 // ** Printf-specific ** 156 157 // Objective-C specific specifiers. 158 ObjCObjArg, // '@' 159 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 160 161 // GlibC specific specifiers. 162 PrintErrno, // 'm' 163 164 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 165 166 // ** Scanf-specific ** 167 ScanListArg, // '[' 168 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 169 }; 170 171 ConversionSpecifier(bool isPrintf = true) 172 : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr), 173 kind(InvalidSpecifier) {} 174 175 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 176 : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {} 177 178 const char *getStart() const { 179 return Position; 180 } 181 182 StringRef getCharacters() const { 183 return StringRef(getStart(), getLength()); 184 } 185 186 bool consumesDataArgument() const { 187 switch (kind) { 188 case PrintErrno: 189 assert(IsPrintf); 190 return false; 191 case PercentArg: 192 return false; 193 default: 194 return true; 195 } 196 } 197 198 Kind getKind() const { return kind; } 199 void setKind(Kind k) { kind = k; } 200 unsigned getLength() const { 201 return EndScanList ? EndScanList - Position : 1; 202 } 203 204 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 205 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 206 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; } 207 const char *toString() const; 208 209 bool isPrintfKind() const { return IsPrintf; } 210 211 Optional<ConversionSpecifier> getStandardSpecifier() const; 212 213 protected: 214 bool IsPrintf; 215 const char *Position; 216 const char *EndScanList; 217 Kind kind; 218 }; 219 220 class ArgType { 221 public: 222 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 223 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 224 private: 225 const Kind K; 226 QualType T; 227 const char *Name; 228 bool Ptr; 229 public: 230 ArgType(Kind k = UnknownTy, const char *n = nullptr) 231 : K(k), Name(n), Ptr(false) {} 232 ArgType(QualType t, const char *n = nullptr) 233 : K(SpecificTy), T(t), Name(n), Ptr(false) {} 234 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {} 235 236 static ArgType Invalid() { return ArgType(InvalidTy); } 237 bool isValid() const { return K != InvalidTy; } 238 239 /// Create an ArgType which corresponds to the type pointer to A. 240 static ArgType PtrTo(const ArgType& A) { 241 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown"); 242 ArgType Res = A; 243 Res.Ptr = true; 244 return Res; 245 } 246 247 bool matchesType(ASTContext &C, QualType argTy) const; 248 249 QualType getRepresentativeType(ASTContext &C) const; 250 251 std::string getRepresentativeTypeName(ASTContext &C) const; 252 }; 253 254 class OptionalAmount { 255 public: 256 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 257 258 OptionalAmount(HowSpecified howSpecified, 259 unsigned amount, 260 const char *amountStart, 261 unsigned amountLength, 262 bool usesPositionalArg) 263 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 264 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 265 266 OptionalAmount(bool valid = true) 267 : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 268 UsesPositionalArg(0), UsesDotPrefix(0) {} 269 270 bool isInvalid() const { 271 return hs == Invalid; 272 } 273 274 HowSpecified getHowSpecified() const { return hs; } 275 void setHowSpecified(HowSpecified h) { hs = h; } 276 277 bool hasDataArgument() const { return hs == Arg; } 278 279 unsigned getArgIndex() const { 280 assert(hasDataArgument()); 281 return amt; 282 } 283 284 unsigned getConstantAmount() const { 285 assert(hs == Constant); 286 return amt; 287 } 288 289 const char *getStart() const { 290 // We include the . character if it is given. 291 return start - UsesDotPrefix; 292 } 293 294 unsigned getConstantLength() const { 295 assert(hs == Constant); 296 return length + UsesDotPrefix; 297 } 298 299 ArgType getArgType(ASTContext &Ctx) const; 300 301 void toString(raw_ostream &os) const; 302 303 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 304 unsigned getPositionalArgIndex() const { 305 assert(hasDataArgument()); 306 return amt + 1; 307 } 308 309 bool usesDotPrefix() const { return UsesDotPrefix; } 310 void setUsesDotPrefix() { UsesDotPrefix = true; } 311 312 private: 313 const char *start; 314 unsigned length; 315 HowSpecified hs; 316 unsigned amt; 317 bool UsesPositionalArg : 1; 318 bool UsesDotPrefix; 319 }; 320 321 322 class FormatSpecifier { 323 protected: 324 LengthModifier LM; 325 OptionalAmount FieldWidth; 326 ConversionSpecifier CS; 327 /// Positional arguments, an IEEE extension: 328 /// IEEE Std 1003.1, 2004 Edition 329 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 330 bool UsesPositionalArg; 331 unsigned argIndex; 332 public: 333 FormatSpecifier(bool isPrintf) 334 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 335 336 void setLengthModifier(LengthModifier lm) { 337 LM = lm; 338 } 339 340 void setUsesPositionalArg() { UsesPositionalArg = true; } 341 342 void setArgIndex(unsigned i) { 343 argIndex = i; 344 } 345 346 unsigned getArgIndex() const { 347 return argIndex; 348 } 349 350 unsigned getPositionalArgIndex() const { 351 return argIndex + 1; 352 } 353 354 const LengthModifier &getLengthModifier() const { 355 return LM; 356 } 357 358 const OptionalAmount &getFieldWidth() const { 359 return FieldWidth; 360 } 361 362 void setFieldWidth(const OptionalAmount &Amt) { 363 FieldWidth = Amt; 364 } 365 366 bool usesPositionalArg() const { return UsesPositionalArg; } 367 368 bool hasValidLengthModifier(const TargetInfo &Target) const; 369 370 bool hasStandardLengthModifier() const; 371 372 Optional<LengthModifier> getCorrectedLengthModifier() const; 373 374 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 375 376 bool hasStandardLengthConversionCombination() const; 377 378 /// For a TypedefType QT, if it is a named integer type such as size_t, 379 /// assign the appropriate value to LM and return true. 380 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM); 381 }; 382 383 } // end analyze_format_string namespace 384 385 //===----------------------------------------------------------------------===// 386 /// Pieces specific to fprintf format strings. 387 388 namespace analyze_printf { 389 390 class PrintfConversionSpecifier : 391 public analyze_format_string::ConversionSpecifier { 392 public: 393 PrintfConversionSpecifier() 394 : ConversionSpecifier(true, nullptr, InvalidSpecifier) {} 395 396 PrintfConversionSpecifier(const char *pos, Kind k) 397 : ConversionSpecifier(true, pos, k) {} 398 399 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 400 bool isDoubleArg() const { return kind >= DoubleArgBeg && 401 kind <= DoubleArgEnd; } 402 unsigned getLength() const { 403 // Conversion specifiers currently only are represented by 404 // single characters, but we be flexible. 405 return 1; 406 } 407 408 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 409 return CS->isPrintfKind(); 410 } 411 }; 412 413 using analyze_format_string::ArgType; 414 using analyze_format_string::LengthModifier; 415 using analyze_format_string::OptionalAmount; 416 using analyze_format_string::OptionalFlag; 417 418 class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 419 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 420 OptionalFlag IsLeftJustified; // '-' 421 OptionalFlag HasPlusPrefix; // '+' 422 OptionalFlag HasSpacePrefix; // ' ' 423 OptionalFlag HasAlternativeForm; // '#' 424 OptionalFlag HasLeadingZeroes; // '0' 425 OptionalAmount Precision; 426 public: 427 PrintfSpecifier() : 428 FormatSpecifier(/* isPrintf = */ true), 429 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 430 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 431 432 static PrintfSpecifier Parse(const char *beg, const char *end); 433 434 // Methods for incrementally constructing the PrintfSpecifier. 435 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 436 CS = cs; 437 } 438 void setHasThousandsGrouping(const char *position) { 439 HasThousandsGrouping = true; 440 HasThousandsGrouping.setPosition(position); 441 } 442 void setIsLeftJustified(const char *position) { 443 IsLeftJustified = true; 444 IsLeftJustified.setPosition(position); 445 } 446 void setHasPlusPrefix(const char *position) { 447 HasPlusPrefix = true; 448 HasPlusPrefix.setPosition(position); 449 } 450 void setHasSpacePrefix(const char *position) { 451 HasSpacePrefix = true; 452 HasSpacePrefix.setPosition(position); 453 } 454 void setHasAlternativeForm(const char *position) { 455 HasAlternativeForm = true; 456 HasAlternativeForm.setPosition(position); 457 } 458 void setHasLeadingZeros(const char *position) { 459 HasLeadingZeroes = true; 460 HasLeadingZeroes.setPosition(position); 461 } 462 void setUsesPositionalArg() { UsesPositionalArg = true; } 463 464 // Methods for querying the format specifier. 465 466 const PrintfConversionSpecifier &getConversionSpecifier() const { 467 return cast<PrintfConversionSpecifier>(CS); 468 } 469 470 void setPrecision(const OptionalAmount &Amt) { 471 Precision = Amt; 472 Precision.setUsesDotPrefix(); 473 } 474 475 const OptionalAmount &getPrecision() const { 476 return Precision; 477 } 478 479 bool consumesDataArgument() const { 480 return getConversionSpecifier().consumesDataArgument(); 481 } 482 483 /// \brief Returns the builtin type that a data argument 484 /// paired with this format specifier should have. This method 485 /// will return null if the format specifier does not have 486 /// a matching data argument or the matching argument matches 487 /// more than one type. 488 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 489 490 const OptionalFlag &hasThousandsGrouping() const { 491 return HasThousandsGrouping; 492 } 493 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 494 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 495 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 496 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 497 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 498 bool usesPositionalArg() const { return UsesPositionalArg; } 499 500 /// Changes the specifier and length according to a QualType, retaining any 501 /// flags or options. Returns true on success, or false when a conversion 502 /// was not successful. 503 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 504 bool IsObjCLiteral); 505 506 void toString(raw_ostream &os) const; 507 508 // Validation methods - to check if any element results in undefined behavior 509 bool hasValidPlusPrefix() const; 510 bool hasValidAlternativeForm() const; 511 bool hasValidLeadingZeros() const; 512 bool hasValidSpacePrefix() const; 513 bool hasValidLeftJustified() const; 514 bool hasValidThousandsGroupingPrefix() const; 515 516 bool hasValidPrecision() const; 517 bool hasValidFieldWidth() const; 518 }; 519 } // end analyze_printf namespace 520 521 //===----------------------------------------------------------------------===// 522 /// Pieces specific to fscanf format strings. 523 524 namespace analyze_scanf { 525 526 class ScanfConversionSpecifier : 527 public analyze_format_string::ConversionSpecifier { 528 public: 529 ScanfConversionSpecifier() 530 : ConversionSpecifier(false, nullptr, InvalidSpecifier) {} 531 532 ScanfConversionSpecifier(const char *pos, Kind k) 533 : ConversionSpecifier(false, pos, k) {} 534 535 void setEndScanList(const char *pos) { EndScanList = pos; } 536 537 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 538 return !CS->isPrintfKind(); 539 } 540 }; 541 542 using analyze_format_string::ArgType; 543 using analyze_format_string::LengthModifier; 544 using analyze_format_string::OptionalAmount; 545 using analyze_format_string::OptionalFlag; 546 547 class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 548 OptionalFlag SuppressAssignment; // '*' 549 public: 550 ScanfSpecifier() : 551 FormatSpecifier(/* isPrintf = */ false), 552 SuppressAssignment("*") {} 553 554 void setSuppressAssignment(const char *position) { 555 SuppressAssignment = true; 556 SuppressAssignment.setPosition(position); 557 } 558 559 const OptionalFlag &getSuppressAssignment() const { 560 return SuppressAssignment; 561 } 562 563 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 564 CS = cs; 565 } 566 567 const ScanfConversionSpecifier &getConversionSpecifier() const { 568 return cast<ScanfConversionSpecifier>(CS); 569 } 570 571 bool consumesDataArgument() const { 572 return CS.consumesDataArgument() && !SuppressAssignment; 573 } 574 575 ArgType getArgType(ASTContext &Ctx) const; 576 577 bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt, 578 ASTContext &Ctx); 579 580 void toString(raw_ostream &os) const; 581 582 static ScanfSpecifier Parse(const char *beg, const char *end); 583 }; 584 585 } // end analyze_scanf namespace 586 587 //===----------------------------------------------------------------------===// 588 // Parsing and processing of format strings (both fprintf and fscanf). 589 590 namespace analyze_format_string { 591 592 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 593 594 class FormatStringHandler { 595 public: 596 FormatStringHandler() {} 597 virtual ~FormatStringHandler(); 598 599 virtual void HandleNullChar(const char *nullCharacter) {} 600 601 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 602 603 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 604 PositionContext p) {} 605 606 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 607 608 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 609 unsigned specifierLen) {} 610 611 // Printf-specific handlers. 612 613 virtual bool HandleInvalidPrintfConversionSpecifier( 614 const analyze_printf::PrintfSpecifier &FS, 615 const char *startSpecifier, 616 unsigned specifierLen) { 617 return true; 618 } 619 620 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 621 const char *startSpecifier, 622 unsigned specifierLen) { 623 return true; 624 } 625 626 // Scanf-specific handlers. 627 628 virtual bool HandleInvalidScanfConversionSpecifier( 629 const analyze_scanf::ScanfSpecifier &FS, 630 const char *startSpecifier, 631 unsigned specifierLen) { 632 return true; 633 } 634 635 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 636 const char *startSpecifier, 637 unsigned specifierLen) { 638 return true; 639 } 640 641 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 642 }; 643 644 bool ParsePrintfString(FormatStringHandler &H, 645 const char *beg, const char *end, const LangOptions &LO, 646 const TargetInfo &Target); 647 648 bool ParseScanfString(FormatStringHandler &H, 649 const char *beg, const char *end, const LangOptions &LO, 650 const TargetInfo &Target); 651 652 } // end analyze_format_string namespace 653 } // end clang namespace 654 #endif 655