1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines APIs for analyzing the format strings of printf, fscanf, 11 // and friends. 12 // 13 // The structure of format strings for fprintf are described in C99 7.19.6.1. 14 // 15 // The structure of format strings for fscanf are described in C99 7.19.6.2. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #ifndef LLVM_CLANG_FORMAT_H 20 #define LLVM_CLANG_FORMAT_H 21 22 #include "clang/AST/CanonicalType.h" 23 24 namespace clang { 25 26 //===----------------------------------------------------------------------===// 27 /// Common components of both fprintf and fscanf format strings. 28 namespace analyze_format_string { 29 30 /// Class representing optional flags with location and representation 31 /// information. 32 class OptionalFlag { 33 public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55 private: 56 const char *representation; 57 const char *position; 58 bool flag; 59 }; 60 61 /// Represents the length modifier in a format string in scanf/printf. 62 class LengthModifier { 63 public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll' 70 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 71 AsIntMax, // 'j' 72 AsSizeT, // 'z' 73 AsPtrDiff, // 't' 74 AsLongDouble, // 'L' 75 AsAllocate, // for '%as', GNU extension to C90 scanf 76 AsMAllocate, // for '%ms', GNU extension to scanf 77 AsWideChar = AsLong // for '%ls', only makes sense for printf 78 }; 79 80 LengthModifier() 81 : Position(0), kind(None) {} 82 LengthModifier(const char *pos, Kind k) 83 : Position(pos), kind(k) {} 84 85 const char *getStart() const { 86 return Position; 87 } 88 89 unsigned getLength() const { 90 switch (kind) { 91 default: 92 return 1; 93 case AsLongLong: 94 case AsChar: 95 return 2; 96 case None: 97 return 0; 98 } 99 } 100 101 Kind getKind() const { return kind; } 102 void setKind(Kind k) { kind = k; } 103 104 const char *toString() const; 105 106 private: 107 const char *Position; 108 Kind kind; 109 }; 110 111 class ConversionSpecifier { 112 public: 113 enum Kind { 114 InvalidSpecifier = 0, 115 // C99 conversion specifiers. 116 cArg, 117 dArg, 118 iArg, 119 IntArgBeg = cArg, IntArgEnd = iArg, 120 121 oArg, 122 uArg, 123 xArg, 124 XArg, 125 UIntArgBeg = oArg, UIntArgEnd = XArg, 126 127 fArg, 128 FArg, 129 eArg, 130 EArg, 131 gArg, 132 GArg, 133 aArg, 134 AArg, 135 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 136 137 sArg, 138 pArg, 139 nArg, 140 PercentArg, 141 CArg, 142 SArg, 143 144 // ** Printf-specific ** 145 146 // Objective-C specific specifiers. 147 ObjCObjArg, // '@' 148 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 149 150 // GlibC specific specifiers. 151 PrintErrno, // 'm' 152 153 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 154 155 // ** Scanf-specific ** 156 ScanListArg, // '[' 157 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 158 }; 159 160 ConversionSpecifier(bool isPrintf) 161 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 162 163 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 164 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 165 166 const char *getStart() const { 167 return Position; 168 } 169 170 StringRef getCharacters() const { 171 return StringRef(getStart(), getLength()); 172 } 173 174 bool consumesDataArgument() const { 175 switch (kind) { 176 case PrintErrno: 177 assert(IsPrintf); 178 case PercentArg: 179 return false; 180 default: 181 return true; 182 } 183 } 184 185 Kind getKind() const { return kind; } 186 void setKind(Kind k) { kind = k; } 187 unsigned getLength() const { 188 return EndScanList ? EndScanList - Position : 1; 189 } 190 191 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 192 const char *toString() const; 193 194 bool isPrintfKind() const { return IsPrintf; } 195 196 protected: 197 bool IsPrintf; 198 const char *Position; 199 const char *EndScanList; 200 Kind kind; 201 }; 202 203 class ArgTypeResult { 204 public: 205 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 206 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 207 private: 208 const Kind K; 209 QualType T; 210 const char *Name; 211 ArgTypeResult(bool) : K(InvalidTy), Name(0) {} 212 public: 213 ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {} 214 ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {} 215 ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {} 216 ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n) {} 217 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {} 218 219 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 220 221 bool isValid() const { return K != InvalidTy; } 222 223 const QualType *getSpecificType() const { 224 return K == SpecificTy ? &T : 0; 225 } 226 227 bool matchesType(ASTContext &C, QualType argTy) const; 228 229 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 230 231 QualType getRepresentativeType(ASTContext &C) const; 232 233 std::string getRepresentativeTypeName(ASTContext &C) const; 234 }; 235 236 class OptionalAmount { 237 public: 238 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 239 240 OptionalAmount(HowSpecified howSpecified, 241 unsigned amount, 242 const char *amountStart, 243 unsigned amountLength, 244 bool usesPositionalArg) 245 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 246 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 247 248 OptionalAmount(bool valid = true) 249 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 250 UsesPositionalArg(0), UsesDotPrefix(0) {} 251 252 bool isInvalid() const { 253 return hs == Invalid; 254 } 255 256 HowSpecified getHowSpecified() const { return hs; } 257 void setHowSpecified(HowSpecified h) { hs = h; } 258 259 bool hasDataArgument() const { return hs == Arg; } 260 261 unsigned getArgIndex() const { 262 assert(hasDataArgument()); 263 return amt; 264 } 265 266 unsigned getConstantAmount() const { 267 assert(hs == Constant); 268 return amt; 269 } 270 271 const char *getStart() const { 272 // We include the . character if it is given. 273 return start - UsesDotPrefix; 274 } 275 276 unsigned getConstantLength() const { 277 assert(hs == Constant); 278 return length + UsesDotPrefix; 279 } 280 281 ArgTypeResult getArgType(ASTContext &Ctx) const; 282 283 void toString(raw_ostream &os) const; 284 285 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 286 unsigned getPositionalArgIndex() const { 287 assert(hasDataArgument()); 288 return amt + 1; 289 } 290 291 bool usesDotPrefix() const { return UsesDotPrefix; } 292 void setUsesDotPrefix() { UsesDotPrefix = true; } 293 294 private: 295 const char *start; 296 unsigned length; 297 HowSpecified hs; 298 unsigned amt; 299 bool UsesPositionalArg : 1; 300 bool UsesDotPrefix; 301 }; 302 303 304 class FormatSpecifier { 305 protected: 306 LengthModifier LM; 307 OptionalAmount FieldWidth; 308 ConversionSpecifier CS; 309 /// Positional arguments, an IEEE extension: 310 /// IEEE Std 1003.1, 2004 Edition 311 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 312 bool UsesPositionalArg; 313 unsigned argIndex; 314 public: 315 FormatSpecifier(bool isPrintf) 316 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 317 318 void setLengthModifier(LengthModifier lm) { 319 LM = lm; 320 } 321 322 void setUsesPositionalArg() { UsesPositionalArg = true; } 323 324 void setArgIndex(unsigned i) { 325 argIndex = i; 326 } 327 328 unsigned getArgIndex() const { 329 return argIndex; 330 } 331 332 unsigned getPositionalArgIndex() const { 333 return argIndex + 1; 334 } 335 336 const LengthModifier &getLengthModifier() const { 337 return LM; 338 } 339 340 const OptionalAmount &getFieldWidth() const { 341 return FieldWidth; 342 } 343 344 void setFieldWidth(const OptionalAmount &Amt) { 345 FieldWidth = Amt; 346 } 347 348 bool usesPositionalArg() const { return UsesPositionalArg; } 349 350 bool hasValidLengthModifier() const; 351 352 bool hasStandardLengthModifier() const; 353 354 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 355 356 bool hasStandardLengthConversionCombination() const; 357 }; 358 359 } // end analyze_format_string namespace 360 361 //===----------------------------------------------------------------------===// 362 /// Pieces specific to fprintf format strings. 363 364 namespace analyze_printf { 365 366 class PrintfConversionSpecifier : 367 public analyze_format_string::ConversionSpecifier { 368 public: 369 PrintfConversionSpecifier() 370 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 371 372 PrintfConversionSpecifier(const char *pos, Kind k) 373 : ConversionSpecifier(true, pos, k) {} 374 375 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 376 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 377 bool isDoubleArg() const { return kind >= DoubleArgBeg && 378 kind <= DoubleArgEnd; } 379 unsigned getLength() const { 380 // Conversion specifiers currently only are represented by 381 // single characters, but we be flexible. 382 return 1; 383 } 384 385 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 386 return CS->isPrintfKind(); 387 } 388 }; 389 390 using analyze_format_string::ArgTypeResult; 391 using analyze_format_string::LengthModifier; 392 using analyze_format_string::OptionalAmount; 393 using analyze_format_string::OptionalFlag; 394 395 class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 396 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 397 OptionalFlag IsLeftJustified; // '-' 398 OptionalFlag HasPlusPrefix; // '+' 399 OptionalFlag HasSpacePrefix; // ' ' 400 OptionalFlag HasAlternativeForm; // '#' 401 OptionalFlag HasLeadingZeroes; // '0' 402 OptionalAmount Precision; 403 public: 404 PrintfSpecifier() : 405 FormatSpecifier(/* isPrintf = */ true), 406 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 407 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 408 409 static PrintfSpecifier Parse(const char *beg, const char *end); 410 411 // Methods for incrementally constructing the PrintfSpecifier. 412 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 413 CS = cs; 414 } 415 void setHasThousandsGrouping(const char *position) { 416 HasThousandsGrouping = true; 417 HasThousandsGrouping.setPosition(position); 418 } 419 void setIsLeftJustified(const char *position) { 420 IsLeftJustified = true; 421 IsLeftJustified.setPosition(position); 422 } 423 void setHasPlusPrefix(const char *position) { 424 HasPlusPrefix = true; 425 HasPlusPrefix.setPosition(position); 426 } 427 void setHasSpacePrefix(const char *position) { 428 HasSpacePrefix = true; 429 HasSpacePrefix.setPosition(position); 430 } 431 void setHasAlternativeForm(const char *position) { 432 HasAlternativeForm = true; 433 HasAlternativeForm.setPosition(position); 434 } 435 void setHasLeadingZeros(const char *position) { 436 HasLeadingZeroes = true; 437 HasLeadingZeroes.setPosition(position); 438 } 439 void setUsesPositionalArg() { UsesPositionalArg = true; } 440 441 // Methods for querying the format specifier. 442 443 const PrintfConversionSpecifier &getConversionSpecifier() const { 444 return cast<PrintfConversionSpecifier>(CS); 445 } 446 447 void setPrecision(const OptionalAmount &Amt) { 448 Precision = Amt; 449 Precision.setUsesDotPrefix(); 450 } 451 452 const OptionalAmount &getPrecision() const { 453 return Precision; 454 } 455 456 bool consumesDataArgument() const { 457 return getConversionSpecifier().consumesDataArgument(); 458 } 459 460 /// \brief Returns the builtin type that a data argument 461 /// paired with this format specifier should have. This method 462 /// will return null if the format specifier does not have 463 /// a matching data argument or the matching argument matches 464 /// more than one type. 465 ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 466 467 const OptionalFlag &hasThousandsGrouping() const { 468 return HasThousandsGrouping; 469 } 470 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 471 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 472 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 473 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 474 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 475 bool usesPositionalArg() const { return UsesPositionalArg; } 476 477 /// Changes the specifier and length according to a QualType, retaining any 478 /// flags or options. Returns true on success, or false when a conversion 479 /// was not successful. 480 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 481 bool IsObjCLiteral); 482 483 void toString(raw_ostream &os) const; 484 485 // Validation methods - to check if any element results in undefined behavior 486 bool hasValidPlusPrefix() const; 487 bool hasValidAlternativeForm() const; 488 bool hasValidLeadingZeros() const; 489 bool hasValidSpacePrefix() const; 490 bool hasValidLeftJustified() const; 491 bool hasValidThousandsGroupingPrefix() const; 492 493 bool hasValidPrecision() const; 494 bool hasValidFieldWidth() const; 495 }; 496 } // end analyze_printf namespace 497 498 //===----------------------------------------------------------------------===// 499 /// Pieces specific to fscanf format strings. 500 501 namespace analyze_scanf { 502 503 class ScanfConversionSpecifier : 504 public analyze_format_string::ConversionSpecifier { 505 public: 506 ScanfConversionSpecifier() 507 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 508 509 ScanfConversionSpecifier(const char *pos, Kind k) 510 : ConversionSpecifier(false, pos, k) {} 511 512 void setEndScanList(const char *pos) { EndScanList = pos; } 513 514 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 515 return !CS->isPrintfKind(); 516 } 517 }; 518 519 using analyze_format_string::ArgTypeResult; 520 using analyze_format_string::LengthModifier; 521 using analyze_format_string::OptionalAmount; 522 using analyze_format_string::OptionalFlag; 523 524 class ScanfArgTypeResult : public ArgTypeResult { 525 public: 526 enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy }; 527 private: 528 Kind K; 529 ArgTypeResult A; 530 const char *Name; 531 QualType getRepresentativeType(ASTContext &C) const; 532 public: 533 ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {} 534 ScanfArgTypeResult(ArgTypeResult a, const char *n = 0) 535 : K(PtrToArgTypeResultTy), A(a), Name(n) { 536 assert(A.isValid()); 537 } 538 539 static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); } 540 541 bool isValid() const { return K != InvalidTy; } 542 543 bool matchesType(ASTContext& C, QualType argTy) const; 544 545 std::string getRepresentativeTypeName(ASTContext& C) const; 546 }; 547 548 class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 549 OptionalFlag SuppressAssignment; // '*' 550 public: 551 ScanfSpecifier() : 552 FormatSpecifier(/* isPrintf = */ false), 553 SuppressAssignment("*") {} 554 555 void setSuppressAssignment(const char *position) { 556 SuppressAssignment = true; 557 SuppressAssignment.setPosition(position); 558 } 559 560 const OptionalFlag &getSuppressAssignment() const { 561 return SuppressAssignment; 562 } 563 564 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 565 CS = cs; 566 } 567 568 const ScanfConversionSpecifier &getConversionSpecifier() const { 569 return cast<ScanfConversionSpecifier>(CS); 570 } 571 572 bool consumesDataArgument() const { 573 return CS.consumesDataArgument() && !SuppressAssignment; 574 } 575 576 ScanfArgTypeResult getArgType(ASTContext &Ctx) const; 577 578 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 579 580 void toString(raw_ostream &os) const; 581 582 static ScanfSpecifier Parse(const char *beg, const char *end); 583 }; 584 585 } // end analyze_scanf namespace 586 587 //===----------------------------------------------------------------------===// 588 // Parsing and processing of format strings (both fprintf and fscanf). 589 590 namespace analyze_format_string { 591 592 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 593 594 class FormatStringHandler { 595 public: 596 FormatStringHandler() {} 597 virtual ~FormatStringHandler(); 598 599 virtual void HandleNullChar(const char *nullCharacter) {} 600 601 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 602 603 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 604 PositionContext p) {} 605 606 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 607 608 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 609 unsigned specifierLen) {} 610 611 // Printf-specific handlers. 612 613 virtual bool HandleInvalidPrintfConversionSpecifier( 614 const analyze_printf::PrintfSpecifier &FS, 615 const char *startSpecifier, 616 unsigned specifierLen) { 617 return true; 618 } 619 620 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 621 const char *startSpecifier, 622 unsigned specifierLen) { 623 return true; 624 } 625 626 // Scanf-specific handlers. 627 628 virtual bool HandleInvalidScanfConversionSpecifier( 629 const analyze_scanf::ScanfSpecifier &FS, 630 const char *startSpecifier, 631 unsigned specifierLen) { 632 return true; 633 } 634 635 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 636 const char *startSpecifier, 637 unsigned specifierLen) { 638 return true; 639 } 640 641 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 642 }; 643 644 bool ParsePrintfString(FormatStringHandler &H, 645 const char *beg, const char *end, const LangOptions &LO); 646 647 bool ParseScanfString(FormatStringHandler &H, 648 const char *beg, const char *end, const LangOptions &LO); 649 650 } // end analyze_format_string namespace 651 } // end clang namespace 652 #endif 653