1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines APIs for analyzing the format strings of printf, fscanf, 11 // and friends. 12 // 13 // The structure of format strings for fprintf are described in C99 7.19.6.1. 14 // 15 // The structure of format strings for fscanf are described in C99 7.19.6.2. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #ifndef LLVM_CLANG_FORMAT_H 20 #define LLVM_CLANG_FORMAT_H 21 22 #include "clang/AST/CanonicalType.h" 23 24 namespace clang { 25 26 //===----------------------------------------------------------------------===// 27 /// Common components of both fprintf and fscanf format strings. 28 namespace analyze_format_string { 29 30 /// Class representing optional flags with location and representation 31 /// information. 32 class OptionalFlag { 33 public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55 private: 56 const char *representation; 57 const char *position; 58 bool flag; 59 }; 60 61 /// Represents the length modifier in a format string in scanf/printf. 62 class LengthModifier { 63 public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll', 'q' (BSD, deprecated) 70 AsIntMax, // 'j' 71 AsSizeT, // 'z' 72 AsPtrDiff, // 't' 73 AsLongDouble, // 'L' 74 AsWideChar = AsLong // for '%ls', only makes sense for printf 75 }; 76 77 LengthModifier() 78 : Position(0), kind(None) {} 79 LengthModifier(const char *pos, Kind k) 80 : Position(pos), kind(k) {} 81 82 const char *getStart() const { 83 return Position; 84 } 85 86 unsigned getLength() const { 87 switch (kind) { 88 default: 89 return 1; 90 case AsLongLong: 91 case AsChar: 92 return 2; 93 case None: 94 return 0; 95 } 96 } 97 98 Kind getKind() const { return kind; } 99 void setKind(Kind k) { kind = k; } 100 101 const char *toString() const; 102 103 private: 104 const char *Position; 105 Kind kind; 106 }; 107 108 class ConversionSpecifier { 109 public: 110 enum Kind { 111 InvalidSpecifier = 0, 112 // C99 conversion specifiers. 113 cArg, 114 dArg, 115 iArg, 116 IntArgBeg = cArg, IntArgEnd = iArg, 117 118 oArg, 119 uArg, 120 xArg, 121 XArg, 122 UIntArgBeg = oArg, UIntArgEnd = XArg, 123 124 fArg, 125 FArg, 126 eArg, 127 EArg, 128 gArg, 129 GArg, 130 aArg, 131 AArg, 132 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 133 134 sArg, 135 pArg, 136 nArg, 137 PercentArg, 138 CArg, 139 SArg, 140 141 // ** Printf-specific ** 142 143 // Objective-C specific specifiers. 144 ObjCObjArg, // '@' 145 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 146 147 // GlibC specific specifiers. 148 PrintErrno, // 'm' 149 150 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 151 152 // ** Scanf-specific ** 153 ScanListArg, // '[' 154 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 155 }; 156 157 ConversionSpecifier(bool isPrintf) 158 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 159 160 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 161 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 162 163 const char *getStart() const { 164 return Position; 165 } 166 167 llvm::StringRef getCharacters() const { 168 return llvm::StringRef(getStart(), getLength()); 169 } 170 171 bool consumesDataArgument() const { 172 switch (kind) { 173 case PrintErrno: 174 assert(IsPrintf); 175 case PercentArg: 176 return false; 177 default: 178 return true; 179 } 180 } 181 182 Kind getKind() const { return kind; } 183 void setKind(Kind k) { kind = k; } 184 unsigned getLength() const { 185 return EndScanList ? EndScanList - Position : 1; 186 } 187 188 const char *toString() const; 189 190 bool isPrintfKind() const { return IsPrintf; } 191 192 protected: 193 bool IsPrintf; 194 const char *Position; 195 const char *EndScanList; 196 Kind kind; 197 }; 198 199 class ArgTypeResult { 200 public: 201 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 202 CStrTy, WCStrTy, WIntTy }; 203 private: 204 const Kind K; 205 QualType T; 206 ArgTypeResult(bool) : K(InvalidTy) {} 207 public: 208 ArgTypeResult(Kind k = UnknownTy) : K(k) {} 209 ArgTypeResult(QualType t) : K(SpecificTy), T(t) {} 210 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {} 211 212 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 213 214 bool isValid() const { return K != InvalidTy; } 215 216 const QualType *getSpecificType() const { 217 return K == SpecificTy ? &T : 0; 218 } 219 220 bool matchesType(ASTContext &C, QualType argTy) const; 221 222 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 223 224 QualType getRepresentativeType(ASTContext &C) const; 225 }; 226 227 class OptionalAmount { 228 public: 229 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 230 231 OptionalAmount(HowSpecified howSpecified, 232 unsigned amount, 233 const char *amountStart, 234 unsigned amountLength, 235 bool usesPositionalArg) 236 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 237 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 238 239 OptionalAmount(bool valid = true) 240 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 241 UsesPositionalArg(0), UsesDotPrefix(0) {} 242 243 bool isInvalid() const { 244 return hs == Invalid; 245 } 246 247 HowSpecified getHowSpecified() const { return hs; } 248 void setHowSpecified(HowSpecified h) { hs = h; } 249 250 bool hasDataArgument() const { return hs == Arg; } 251 252 unsigned getArgIndex() const { 253 assert(hasDataArgument()); 254 return amt; 255 } 256 257 unsigned getConstantAmount() const { 258 assert(hs == Constant); 259 return amt; 260 } 261 262 const char *getStart() const { 263 // We include the . character if it is given. 264 return start - UsesDotPrefix; 265 } 266 267 unsigned getConstantLength() const { 268 assert(hs == Constant); 269 return length + UsesDotPrefix; 270 } 271 272 ArgTypeResult getArgType(ASTContext &Ctx) const; 273 274 void toString(llvm::raw_ostream &os) const; 275 276 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 277 unsigned getPositionalArgIndex() const { 278 assert(hasDataArgument()); 279 return amt + 1; 280 } 281 282 bool usesDotPrefix() const { return UsesDotPrefix; } 283 void setUsesDotPrefix() { UsesDotPrefix = true; } 284 285 private: 286 const char *start; 287 unsigned length; 288 HowSpecified hs; 289 unsigned amt; 290 bool UsesPositionalArg : 1; 291 bool UsesDotPrefix; 292 }; 293 294 295 class FormatSpecifier { 296 protected: 297 LengthModifier LM; 298 OptionalAmount FieldWidth; 299 ConversionSpecifier CS; 300 /// Positional arguments, an IEEE extension: 301 /// IEEE Std 1003.1, 2004 Edition 302 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 303 bool UsesPositionalArg; 304 unsigned argIndex; 305 public: 306 FormatSpecifier(bool isPrintf) 307 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 308 309 void setLengthModifier(LengthModifier lm) { 310 LM = lm; 311 } 312 313 void setUsesPositionalArg() { UsesPositionalArg = true; } 314 315 void setArgIndex(unsigned i) { 316 argIndex = i; 317 } 318 319 unsigned getArgIndex() const { 320 return argIndex; 321 } 322 323 unsigned getPositionalArgIndex() const { 324 return argIndex + 1; 325 } 326 327 const LengthModifier &getLengthModifier() const { 328 return LM; 329 } 330 331 const OptionalAmount &getFieldWidth() const { 332 return FieldWidth; 333 } 334 335 void setFieldWidth(const OptionalAmount &Amt) { 336 FieldWidth = Amt; 337 } 338 339 bool usesPositionalArg() const { return UsesPositionalArg; } 340 341 bool hasValidLengthModifier() const; 342 }; 343 344 } // end analyze_format_string namespace 345 346 //===----------------------------------------------------------------------===// 347 /// Pieces specific to fprintf format strings. 348 349 namespace analyze_printf { 350 351 class PrintfConversionSpecifier : 352 public analyze_format_string::ConversionSpecifier { 353 public: 354 PrintfConversionSpecifier() 355 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 356 357 PrintfConversionSpecifier(const char *pos, Kind k) 358 : ConversionSpecifier(true, pos, k) {} 359 360 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 361 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 362 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 363 bool isDoubleArg() const { return kind >= DoubleArgBeg && 364 kind <= DoubleArgBeg; } 365 unsigned getLength() const { 366 // Conversion specifiers currently only are represented by 367 // single characters, but we be flexible. 368 return 1; 369 } 370 371 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 372 return CS->isPrintfKind(); 373 } 374 }; 375 376 using analyze_format_string::ArgTypeResult; 377 using analyze_format_string::LengthModifier; 378 using analyze_format_string::OptionalAmount; 379 using analyze_format_string::OptionalFlag; 380 381 class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 382 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 383 OptionalFlag IsLeftJustified; // '-' 384 OptionalFlag HasPlusPrefix; // '+' 385 OptionalFlag HasSpacePrefix; // ' ' 386 OptionalFlag HasAlternativeForm; // '#' 387 OptionalFlag HasLeadingZeroes; // '0' 388 OptionalAmount Precision; 389 public: 390 PrintfSpecifier() : 391 FormatSpecifier(/* isPrintf = */ true), 392 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 393 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 394 395 static PrintfSpecifier Parse(const char *beg, const char *end); 396 397 // Methods for incrementally constructing the PrintfSpecifier. 398 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 399 CS = cs; 400 } 401 void setHasThousandsGrouping(const char *position) { 402 HasThousandsGrouping = true; 403 HasThousandsGrouping.setPosition(position); 404 } 405 void setIsLeftJustified(const char *position) { 406 IsLeftJustified = true; 407 IsLeftJustified.setPosition(position); 408 } 409 void setHasPlusPrefix(const char *position) { 410 HasPlusPrefix = true; 411 HasPlusPrefix.setPosition(position); 412 } 413 void setHasSpacePrefix(const char *position) { 414 HasSpacePrefix = true; 415 HasSpacePrefix.setPosition(position); 416 } 417 void setHasAlternativeForm(const char *position) { 418 HasAlternativeForm = true; 419 HasAlternativeForm.setPosition(position); 420 } 421 void setHasLeadingZeros(const char *position) { 422 HasLeadingZeroes = true; 423 HasLeadingZeroes.setPosition(position); 424 } 425 void setUsesPositionalArg() { UsesPositionalArg = true; } 426 427 // Methods for querying the format specifier. 428 429 const PrintfConversionSpecifier &getConversionSpecifier() const { 430 return cast<PrintfConversionSpecifier>(CS); 431 } 432 433 void setPrecision(const OptionalAmount &Amt) { 434 Precision = Amt; 435 Precision.setUsesDotPrefix(); 436 } 437 438 const OptionalAmount &getPrecision() const { 439 return Precision; 440 } 441 442 bool consumesDataArgument() const { 443 return getConversionSpecifier().consumesDataArgument(); 444 } 445 446 /// \brief Returns the builtin type that a data argument 447 /// paired with this format specifier should have. This method 448 /// will return null if the format specifier does not have 449 /// a matching data argument or the matching argument matches 450 /// more than one type. 451 ArgTypeResult getArgType(ASTContext &Ctx) const; 452 453 const OptionalFlag &hasThousandsGrouping() const { 454 return HasThousandsGrouping; 455 } 456 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 457 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 458 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 459 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 460 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 461 bool usesPositionalArg() const { return UsesPositionalArg; } 462 463 /// Changes the specifier and length according to a QualType, retaining any 464 /// flags or options. Returns true on success, or false when a conversion 465 /// was not successful. 466 bool fixType(QualType QT); 467 468 void toString(llvm::raw_ostream &os) const; 469 470 // Validation methods - to check if any element results in undefined behavior 471 bool hasValidPlusPrefix() const; 472 bool hasValidAlternativeForm() const; 473 bool hasValidLeadingZeros() const; 474 bool hasValidSpacePrefix() const; 475 bool hasValidLeftJustified() const; 476 bool hasValidThousandsGroupingPrefix() const; 477 478 bool hasValidPrecision() const; 479 bool hasValidFieldWidth() const; 480 }; 481 } // end analyze_printf namespace 482 483 //===----------------------------------------------------------------------===// 484 /// Pieces specific to fscanf format strings. 485 486 namespace analyze_scanf { 487 488 class ScanfConversionSpecifier : 489 public analyze_format_string::ConversionSpecifier { 490 public: 491 ScanfConversionSpecifier() 492 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 493 494 ScanfConversionSpecifier(const char *pos, Kind k) 495 : ConversionSpecifier(false, pos, k) {} 496 497 void setEndScanList(const char *pos) { EndScanList = pos; } 498 499 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 500 return !CS->isPrintfKind(); 501 } 502 }; 503 504 using analyze_format_string::LengthModifier; 505 using analyze_format_string::OptionalAmount; 506 using analyze_format_string::OptionalFlag; 507 508 class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 509 OptionalFlag SuppressAssignment; // '*' 510 public: 511 ScanfSpecifier() : 512 FormatSpecifier(/* isPrintf = */ false), 513 SuppressAssignment("*") {} 514 515 void setSuppressAssignment(const char *position) { 516 SuppressAssignment = true; 517 SuppressAssignment.setPosition(position); 518 } 519 520 const OptionalFlag &getSuppressAssignment() const { 521 return SuppressAssignment; 522 } 523 524 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 525 CS = cs; 526 } 527 528 const ScanfConversionSpecifier &getConversionSpecifier() const { 529 return cast<ScanfConversionSpecifier>(CS); 530 } 531 532 bool consumesDataArgument() const { 533 return CS.consumesDataArgument() && !SuppressAssignment; 534 } 535 536 static ScanfSpecifier Parse(const char *beg, const char *end); 537 }; 538 539 } // end analyze_scanf namespace 540 541 //===----------------------------------------------------------------------===// 542 // Parsing and processing of format strings (both fprintf and fscanf). 543 544 namespace analyze_format_string { 545 546 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 547 548 class FormatStringHandler { 549 public: 550 FormatStringHandler() {} 551 virtual ~FormatStringHandler(); 552 553 virtual void HandleNullChar(const char *nullCharacter) {} 554 555 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 556 PositionContext p) {} 557 558 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 559 560 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 561 unsigned specifierLen) {} 562 563 // Printf-specific handlers. 564 565 virtual bool HandleInvalidPrintfConversionSpecifier( 566 const analyze_printf::PrintfSpecifier &FS, 567 const char *startSpecifier, 568 unsigned specifierLen) { 569 return true; 570 } 571 572 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 573 const char *startSpecifier, 574 unsigned specifierLen) { 575 return true; 576 } 577 578 // Scanf-specific handlers. 579 580 virtual bool HandleInvalidScanfConversionSpecifier( 581 const analyze_scanf::ScanfSpecifier &FS, 582 const char *startSpecifier, 583 unsigned specifierLen) { 584 return true; 585 } 586 587 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 588 const char *startSpecifier, 589 unsigned specifierLen) { 590 return true; 591 } 592 593 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 594 }; 595 596 bool ParsePrintfString(FormatStringHandler &H, 597 const char *beg, const char *end); 598 599 bool ParseScanfString(FormatStringHandler &H, 600 const char *beg, const char *end); 601 602 } // end analyze_format_string namespace 603 } // end clang namespace 604 #endif 605