1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines APIs for analyzing the format strings of printf, fscanf, 11 // and friends. 12 // 13 // The structure of format strings for fprintf are described in C99 7.19.6.1. 14 // 15 // The structure of format strings for fscanf are described in C99 7.19.6.2. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #ifndef LLVM_CLANG_FORMAT_H 20 #define LLVM_CLANG_FORMAT_H 21 22 #include "clang/AST/CanonicalType.h" 23 24 namespace clang { 25 26 class TargetInfo; 27 28 //===----------------------------------------------------------------------===// 29 /// Common components of both fprintf and fscanf format strings. 30 namespace analyze_format_string { 31 32 /// Class representing optional flags with location and representation 33 /// information. 34 class OptionalFlag { 35 public: 36 OptionalFlag(const char *Representation) 37 : representation(Representation), flag(false) {} 38 bool isSet() { return flag; } 39 void set() { flag = true; } 40 void clear() { flag = false; } 41 void setPosition(const char *position) { 42 assert(position); 43 this->position = position; 44 } 45 const char *getPosition() const { 46 assert(position); 47 return position; 48 } 49 const char *toString() const { return representation; } 50 51 // Overloaded operators for bool like qualities 52 LLVM_EXPLICIT operator bool() const { return flag; } 53 OptionalFlag& operator=(const bool &rhs) { 54 flag = rhs; 55 return *this; // Return a reference to myself. 56 } 57 private: 58 const char *representation; 59 const char *position; 60 bool flag; 61 }; 62 63 /// Represents the length modifier in a format string in scanf/printf. 64 class LengthModifier { 65 public: 66 enum Kind { 67 None, 68 AsChar, // 'hh' 69 AsShort, // 'h' 70 AsLong, // 'l' 71 AsLongLong, // 'll' 72 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 73 AsIntMax, // 'j' 74 AsSizeT, // 'z' 75 AsPtrDiff, // 't' 76 AsLongDouble, // 'L' 77 AsAllocate, // for '%as', GNU extension to C90 scanf 78 AsMAllocate, // for '%ms', GNU extension to scanf 79 AsWideChar = AsLong // for '%ls', only makes sense for printf 80 }; 81 82 LengthModifier() 83 : Position(0), kind(None) {} 84 LengthModifier(const char *pos, Kind k) 85 : Position(pos), kind(k) {} 86 87 const char *getStart() const { 88 return Position; 89 } 90 91 unsigned getLength() const { 92 switch (kind) { 93 default: 94 return 1; 95 case AsLongLong: 96 case AsChar: 97 return 2; 98 case None: 99 return 0; 100 } 101 } 102 103 Kind getKind() const { return kind; } 104 void setKind(Kind k) { kind = k; } 105 106 const char *toString() const; 107 108 private: 109 const char *Position; 110 Kind kind; 111 }; 112 113 class ConversionSpecifier { 114 public: 115 enum Kind { 116 InvalidSpecifier = 0, 117 // C99 conversion specifiers. 118 cArg, 119 dArg, 120 DArg, // Apple extension 121 iArg, 122 IntArgBeg = dArg, IntArgEnd = iArg, 123 124 oArg, 125 OArg, // Apple extension 126 uArg, 127 UArg, // Apple extension 128 xArg, 129 XArg, 130 UIntArgBeg = oArg, UIntArgEnd = XArg, 131 132 fArg, 133 FArg, 134 eArg, 135 EArg, 136 gArg, 137 GArg, 138 aArg, 139 AArg, 140 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 141 142 sArg, 143 pArg, 144 nArg, 145 PercentArg, 146 CArg, 147 SArg, 148 149 // ** Printf-specific ** 150 151 // Objective-C specific specifiers. 152 ObjCObjArg, // '@' 153 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 154 155 // GlibC specific specifiers. 156 PrintErrno, // 'm' 157 158 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 159 160 // ** Scanf-specific ** 161 ScanListArg, // '[' 162 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 163 }; 164 165 ConversionSpecifier(bool isPrintf = true) 166 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 167 168 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 169 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 170 171 const char *getStart() const { 172 return Position; 173 } 174 175 StringRef getCharacters() const { 176 return StringRef(getStart(), getLength()); 177 } 178 179 bool consumesDataArgument() const { 180 switch (kind) { 181 case PrintErrno: 182 assert(IsPrintf); 183 return false; 184 case PercentArg: 185 return false; 186 default: 187 return true; 188 } 189 } 190 191 Kind getKind() const { return kind; } 192 void setKind(Kind k) { kind = k; } 193 unsigned getLength() const { 194 return EndScanList ? EndScanList - Position : 1; 195 } 196 197 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 198 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 199 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; } 200 const char *toString() const; 201 202 bool isPrintfKind() const { return IsPrintf; } 203 204 Optional<ConversionSpecifier> getStandardSpecifier() const; 205 206 protected: 207 bool IsPrintf; 208 const char *Position; 209 const char *EndScanList; 210 Kind kind; 211 }; 212 213 class ArgType { 214 public: 215 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 216 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 217 private: 218 const Kind K; 219 QualType T; 220 const char *Name; 221 bool Ptr; 222 public: 223 ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {} 224 ArgType(QualType t, const char *n = 0) 225 : K(SpecificTy), T(t), Name(n), Ptr(false) {} 226 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {} 227 228 static ArgType Invalid() { return ArgType(InvalidTy); } 229 bool isValid() const { return K != InvalidTy; } 230 231 /// Create an ArgType which corresponds to the type pointer to A. 232 static ArgType PtrTo(const ArgType& A) { 233 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown"); 234 ArgType Res = A; 235 Res.Ptr = true; 236 return Res; 237 } 238 239 bool matchesType(ASTContext &C, QualType argTy) const; 240 241 QualType getRepresentativeType(ASTContext &C) const; 242 243 std::string getRepresentativeTypeName(ASTContext &C) const; 244 }; 245 246 class OptionalAmount { 247 public: 248 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 249 250 OptionalAmount(HowSpecified howSpecified, 251 unsigned amount, 252 const char *amountStart, 253 unsigned amountLength, 254 bool usesPositionalArg) 255 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 256 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 257 258 OptionalAmount(bool valid = true) 259 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 260 UsesPositionalArg(0), UsesDotPrefix(0) {} 261 262 bool isInvalid() const { 263 return hs == Invalid; 264 } 265 266 HowSpecified getHowSpecified() const { return hs; } 267 void setHowSpecified(HowSpecified h) { hs = h; } 268 269 bool hasDataArgument() const { return hs == Arg; } 270 271 unsigned getArgIndex() const { 272 assert(hasDataArgument()); 273 return amt; 274 } 275 276 unsigned getConstantAmount() const { 277 assert(hs == Constant); 278 return amt; 279 } 280 281 const char *getStart() const { 282 // We include the . character if it is given. 283 return start - UsesDotPrefix; 284 } 285 286 unsigned getConstantLength() const { 287 assert(hs == Constant); 288 return length + UsesDotPrefix; 289 } 290 291 ArgType getArgType(ASTContext &Ctx) const; 292 293 void toString(raw_ostream &os) const; 294 295 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 296 unsigned getPositionalArgIndex() const { 297 assert(hasDataArgument()); 298 return amt + 1; 299 } 300 301 bool usesDotPrefix() const { return UsesDotPrefix; } 302 void setUsesDotPrefix() { UsesDotPrefix = true; } 303 304 private: 305 const char *start; 306 unsigned length; 307 HowSpecified hs; 308 unsigned amt; 309 bool UsesPositionalArg : 1; 310 bool UsesDotPrefix; 311 }; 312 313 314 class FormatSpecifier { 315 protected: 316 LengthModifier LM; 317 OptionalAmount FieldWidth; 318 ConversionSpecifier CS; 319 /// Positional arguments, an IEEE extension: 320 /// IEEE Std 1003.1, 2004 Edition 321 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 322 bool UsesPositionalArg; 323 unsigned argIndex; 324 public: 325 FormatSpecifier(bool isPrintf) 326 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 327 328 void setLengthModifier(LengthModifier lm) { 329 LM = lm; 330 } 331 332 void setUsesPositionalArg() { UsesPositionalArg = true; } 333 334 void setArgIndex(unsigned i) { 335 argIndex = i; 336 } 337 338 unsigned getArgIndex() const { 339 return argIndex; 340 } 341 342 unsigned getPositionalArgIndex() const { 343 return argIndex + 1; 344 } 345 346 const LengthModifier &getLengthModifier() const { 347 return LM; 348 } 349 350 const OptionalAmount &getFieldWidth() const { 351 return FieldWidth; 352 } 353 354 void setFieldWidth(const OptionalAmount &Amt) { 355 FieldWidth = Amt; 356 } 357 358 bool usesPositionalArg() const { return UsesPositionalArg; } 359 360 bool hasValidLengthModifier(const TargetInfo &Target) const; 361 362 bool hasStandardLengthModifier() const; 363 364 Optional<LengthModifier> getCorrectedLengthModifier() const; 365 366 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 367 368 bool hasStandardLengthConversionCombination() const; 369 370 /// For a TypedefType QT, if it is a named integer type such as size_t, 371 /// assign the appropriate value to LM and return true. 372 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM); 373 }; 374 375 } // end analyze_format_string namespace 376 377 //===----------------------------------------------------------------------===// 378 /// Pieces specific to fprintf format strings. 379 380 namespace analyze_printf { 381 382 class PrintfConversionSpecifier : 383 public analyze_format_string::ConversionSpecifier { 384 public: 385 PrintfConversionSpecifier() 386 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 387 388 PrintfConversionSpecifier(const char *pos, Kind k) 389 : ConversionSpecifier(true, pos, k) {} 390 391 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 392 bool isDoubleArg() const { return kind >= DoubleArgBeg && 393 kind <= DoubleArgEnd; } 394 unsigned getLength() const { 395 // Conversion specifiers currently only are represented by 396 // single characters, but we be flexible. 397 return 1; 398 } 399 400 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 401 return CS->isPrintfKind(); 402 } 403 }; 404 405 using analyze_format_string::ArgType; 406 using analyze_format_string::LengthModifier; 407 using analyze_format_string::OptionalAmount; 408 using analyze_format_string::OptionalFlag; 409 410 class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 411 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 412 OptionalFlag IsLeftJustified; // '-' 413 OptionalFlag HasPlusPrefix; // '+' 414 OptionalFlag HasSpacePrefix; // ' ' 415 OptionalFlag HasAlternativeForm; // '#' 416 OptionalFlag HasLeadingZeroes; // '0' 417 OptionalAmount Precision; 418 public: 419 PrintfSpecifier() : 420 FormatSpecifier(/* isPrintf = */ true), 421 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 422 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 423 424 static PrintfSpecifier Parse(const char *beg, const char *end); 425 426 // Methods for incrementally constructing the PrintfSpecifier. 427 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 428 CS = cs; 429 } 430 void setHasThousandsGrouping(const char *position) { 431 HasThousandsGrouping = true; 432 HasThousandsGrouping.setPosition(position); 433 } 434 void setIsLeftJustified(const char *position) { 435 IsLeftJustified = true; 436 IsLeftJustified.setPosition(position); 437 } 438 void setHasPlusPrefix(const char *position) { 439 HasPlusPrefix = true; 440 HasPlusPrefix.setPosition(position); 441 } 442 void setHasSpacePrefix(const char *position) { 443 HasSpacePrefix = true; 444 HasSpacePrefix.setPosition(position); 445 } 446 void setHasAlternativeForm(const char *position) { 447 HasAlternativeForm = true; 448 HasAlternativeForm.setPosition(position); 449 } 450 void setHasLeadingZeros(const char *position) { 451 HasLeadingZeroes = true; 452 HasLeadingZeroes.setPosition(position); 453 } 454 void setUsesPositionalArg() { UsesPositionalArg = true; } 455 456 // Methods for querying the format specifier. 457 458 const PrintfConversionSpecifier &getConversionSpecifier() const { 459 return cast<PrintfConversionSpecifier>(CS); 460 } 461 462 void setPrecision(const OptionalAmount &Amt) { 463 Precision = Amt; 464 Precision.setUsesDotPrefix(); 465 } 466 467 const OptionalAmount &getPrecision() const { 468 return Precision; 469 } 470 471 bool consumesDataArgument() const { 472 return getConversionSpecifier().consumesDataArgument(); 473 } 474 475 /// \brief Returns the builtin type that a data argument 476 /// paired with this format specifier should have. This method 477 /// will return null if the format specifier does not have 478 /// a matching data argument or the matching argument matches 479 /// more than one type. 480 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 481 482 const OptionalFlag &hasThousandsGrouping() const { 483 return HasThousandsGrouping; 484 } 485 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 486 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 487 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 488 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 489 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 490 bool usesPositionalArg() const { return UsesPositionalArg; } 491 492 /// Changes the specifier and length according to a QualType, retaining any 493 /// flags or options. Returns true on success, or false when a conversion 494 /// was not successful. 495 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 496 bool IsObjCLiteral); 497 498 void toString(raw_ostream &os) const; 499 500 // Validation methods - to check if any element results in undefined behavior 501 bool hasValidPlusPrefix() const; 502 bool hasValidAlternativeForm() const; 503 bool hasValidLeadingZeros() const; 504 bool hasValidSpacePrefix() const; 505 bool hasValidLeftJustified() const; 506 bool hasValidThousandsGroupingPrefix() const; 507 508 bool hasValidPrecision() const; 509 bool hasValidFieldWidth() const; 510 }; 511 } // end analyze_printf namespace 512 513 //===----------------------------------------------------------------------===// 514 /// Pieces specific to fscanf format strings. 515 516 namespace analyze_scanf { 517 518 class ScanfConversionSpecifier : 519 public analyze_format_string::ConversionSpecifier { 520 public: 521 ScanfConversionSpecifier() 522 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 523 524 ScanfConversionSpecifier(const char *pos, Kind k) 525 : ConversionSpecifier(false, pos, k) {} 526 527 void setEndScanList(const char *pos) { EndScanList = pos; } 528 529 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 530 return !CS->isPrintfKind(); 531 } 532 }; 533 534 using analyze_format_string::ArgType; 535 using analyze_format_string::LengthModifier; 536 using analyze_format_string::OptionalAmount; 537 using analyze_format_string::OptionalFlag; 538 539 class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 540 OptionalFlag SuppressAssignment; // '*' 541 public: 542 ScanfSpecifier() : 543 FormatSpecifier(/* isPrintf = */ false), 544 SuppressAssignment("*") {} 545 546 void setSuppressAssignment(const char *position) { 547 SuppressAssignment = true; 548 SuppressAssignment.setPosition(position); 549 } 550 551 const OptionalFlag &getSuppressAssignment() const { 552 return SuppressAssignment; 553 } 554 555 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 556 CS = cs; 557 } 558 559 const ScanfConversionSpecifier &getConversionSpecifier() const { 560 return cast<ScanfConversionSpecifier>(CS); 561 } 562 563 bool consumesDataArgument() const { 564 return CS.consumesDataArgument() && !SuppressAssignment; 565 } 566 567 ArgType getArgType(ASTContext &Ctx) const; 568 569 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 570 571 void toString(raw_ostream &os) const; 572 573 static ScanfSpecifier Parse(const char *beg, const char *end); 574 }; 575 576 } // end analyze_scanf namespace 577 578 //===----------------------------------------------------------------------===// 579 // Parsing and processing of format strings (both fprintf and fscanf). 580 581 namespace analyze_format_string { 582 583 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 584 585 class FormatStringHandler { 586 public: 587 FormatStringHandler() {} 588 virtual ~FormatStringHandler(); 589 590 virtual void HandleNullChar(const char *nullCharacter) {} 591 592 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 593 594 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 595 PositionContext p) {} 596 597 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 598 599 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 600 unsigned specifierLen) {} 601 602 // Printf-specific handlers. 603 604 virtual bool HandleInvalidPrintfConversionSpecifier( 605 const analyze_printf::PrintfSpecifier &FS, 606 const char *startSpecifier, 607 unsigned specifierLen) { 608 return true; 609 } 610 611 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 612 const char *startSpecifier, 613 unsigned specifierLen) { 614 return true; 615 } 616 617 // Scanf-specific handlers. 618 619 virtual bool HandleInvalidScanfConversionSpecifier( 620 const analyze_scanf::ScanfSpecifier &FS, 621 const char *startSpecifier, 622 unsigned specifierLen) { 623 return true; 624 } 625 626 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 627 const char *startSpecifier, 628 unsigned specifierLen) { 629 return true; 630 } 631 632 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 633 }; 634 635 bool ParsePrintfString(FormatStringHandler &H, 636 const char *beg, const char *end, const LangOptions &LO, 637 const TargetInfo &Target); 638 639 bool ParseScanfString(FormatStringHandler &H, 640 const char *beg, const char *end, const LangOptions &LO, 641 const TargetInfo &Target); 642 643 } // end analyze_format_string namespace 644 } // end clang namespace 645 #endif 646