1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Handling of format string in printf and friends. The structure of format 11 // strings for fprintf() are described in C99 7.19.6.1. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/Analyses/FormatString.h" 16 #include "FormatStringParsing.h" 17 18 using clang::analyze_format_string::ArgTypeResult; 19 using clang::analyze_format_string::FormatStringHandler; 20 using clang::analyze_format_string::LengthModifier; 21 using clang::analyze_format_string::OptionalAmount; 22 using clang::analyze_format_string::ConversionSpecifier; 23 using clang::analyze_printf::PrintfSpecifier; 24 25 using namespace clang; 26 27 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> 28 PrintfSpecifierResult; 29 30 //===----------------------------------------------------------------------===// 31 // Methods for parsing format strings. 32 //===----------------------------------------------------------------------===// 33 34 using analyze_format_string::ParseNonPositionAmount; 35 36 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, 37 const char *Start, const char *&Beg, const char *E, 38 unsigned *argIndex) { 39 if (argIndex) { 40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 41 } else { 42 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 43 analyze_format_string::PrecisionPos); 44 if (Amt.isInvalid()) 45 return true; 46 FS.setPrecision(Amt); 47 } 48 return false; 49 } 50 51 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, 52 const char *&Beg, 53 const char *E, 54 unsigned &argIndex) { 55 56 using namespace clang::analyze_format_string; 57 using namespace clang::analyze_printf; 58 59 const char *I = Beg; 60 const char *Start = 0; 61 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 62 63 // Look for a '%' character that indicates the start of a format specifier. 64 for ( ; I != E ; ++I) { 65 char c = *I; 66 if (c == '\0') { 67 // Detect spurious null characters, which are likely errors. 68 H.HandleNullChar(I); 69 return true; 70 } 71 if (c == '%') { 72 Start = I++; // Record the start of the format specifier. 73 break; 74 } 75 } 76 77 // No format specifier found? 78 if (!Start) 79 return false; 80 81 if (I == E) { 82 // No more characters left? 83 H.HandleIncompleteSpecifier(Start, E - Start); 84 return true; 85 } 86 87 PrintfSpecifier FS; 88 if (ParseArgPosition(H, FS, Start, I, E)) 89 return true; 90 91 if (I == E) { 92 // No more characters left? 93 H.HandleIncompleteSpecifier(Start, E - Start); 94 return true; 95 } 96 97 // Look for flags (if any). 98 bool hasMore = true; 99 for ( ; I != E; ++I) { 100 switch (*I) { 101 default: hasMore = false; break; 102 case '\'': 103 // FIXME: POSIX specific. Always accept? 104 FS.setHasThousandsGrouping(I); 105 break; 106 case '-': FS.setIsLeftJustified(I); break; 107 case '+': FS.setHasPlusPrefix(I); break; 108 case ' ': FS.setHasSpacePrefix(I); break; 109 case '#': FS.setHasAlternativeForm(I); break; 110 case '0': FS.setHasLeadingZeros(I); break; 111 } 112 if (!hasMore) 113 break; 114 } 115 116 if (I == E) { 117 // No more characters left? 118 H.HandleIncompleteSpecifier(Start, E - Start); 119 return true; 120 } 121 122 // Look for the field width (if any). 123 if (ParseFieldWidth(H, FS, Start, I, E, 124 FS.usesPositionalArg() ? 0 : &argIndex)) 125 return true; 126 127 if (I == E) { 128 // No more characters left? 129 H.HandleIncompleteSpecifier(Start, E - Start); 130 return true; 131 } 132 133 // Look for the precision (if any). 134 if (*I == '.') { 135 ++I; 136 if (I == E) { 137 H.HandleIncompleteSpecifier(Start, E - Start); 138 return true; 139 } 140 141 if (ParsePrecision(H, FS, Start, I, E, 142 FS.usesPositionalArg() ? 0 : &argIndex)) 143 return true; 144 145 if (I == E) { 146 // No more characters left? 147 H.HandleIncompleteSpecifier(Start, E - Start); 148 return true; 149 } 150 } 151 152 // Look for the length modifier. 153 if (ParseLengthModifier(FS, I, E) && I == E) { 154 // No more characters left? 155 H.HandleIncompleteSpecifier(Start, E - Start); 156 return true; 157 } 158 159 if (*I == '\0') { 160 // Detect spurious null characters, which are likely errors. 161 H.HandleNullChar(I); 162 return true; 163 } 164 165 // Finally, look for the conversion specifier. 166 const char *conversionPosition = I++; 167 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 168 switch (*conversionPosition) { 169 default: 170 break; 171 // C99: 7.19.6.1 (section 8). 172 case '%': k = ConversionSpecifier::PercentArg; break; 173 case 'A': k = ConversionSpecifier::AArg; break; 174 case 'E': k = ConversionSpecifier::EArg; break; 175 case 'F': k = ConversionSpecifier::FArg; break; 176 case 'G': k = ConversionSpecifier::GArg; break; 177 case 'X': k = ConversionSpecifier::XArg; break; 178 case 'a': k = ConversionSpecifier::aArg; break; 179 case 'c': k = ConversionSpecifier::cArg; break; 180 case 'd': k = ConversionSpecifier::dArg; break; 181 case 'e': k = ConversionSpecifier::eArg; break; 182 case 'f': k = ConversionSpecifier::fArg; break; 183 case 'g': k = ConversionSpecifier::gArg; break; 184 case 'i': k = ConversionSpecifier::iArg; break; 185 case 'n': k = ConversionSpecifier::nArg; break; 186 case 'o': k = ConversionSpecifier::oArg; break; 187 case 'p': k = ConversionSpecifier::pArg; break; 188 case 's': k = ConversionSpecifier::sArg; break; 189 case 'u': k = ConversionSpecifier::uArg; break; 190 case 'x': k = ConversionSpecifier::xArg; break; 191 // POSIX specific. 192 case 'C': k = ConversionSpecifier::CArg; break; 193 case 'S': k = ConversionSpecifier::SArg; break; 194 // Objective-C. 195 case '@': k = ConversionSpecifier::ObjCObjArg; break; 196 // Glibc specific. 197 case 'm': k = ConversionSpecifier::PrintErrno; break; 198 } 199 PrintfConversionSpecifier CS(conversionPosition, k); 200 FS.setConversionSpecifier(CS); 201 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 202 FS.setArgIndex(argIndex++); 203 204 if (k == ConversionSpecifier::InvalidSpecifier) { 205 // Assume the conversion takes one argument. 206 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start); 207 } 208 return PrintfSpecifierResult(Start, FS); 209 } 210 211 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, 212 const char *I, 213 const char *E) { 214 215 unsigned argIndex = 0; 216 217 // Keep looking for a format specifier until we have exhausted the string. 218 while (I != E) { 219 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex); 220 // Did a fail-stop error of any kind occur when parsing the specifier? 221 // If so, don't do any more processing. 222 if (FSR.shouldStop()) 223 return true;; 224 // Did we exhaust the string or encounter an error that 225 // we can recover from? 226 if (!FSR.hasValue()) 227 continue; 228 // We have a format specifier. Pass it to the callback. 229 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), 230 I - FSR.getStart())) 231 return true; 232 } 233 assert(I == E && "Format string not exhausted"); 234 return false; 235 } 236 237 //===----------------------------------------------------------------------===// 238 // Methods on ConversionSpecifier. 239 //===----------------------------------------------------------------------===// 240 const char *ConversionSpecifier::toString() const { 241 switch (kind) { 242 case dArg: return "d"; 243 case iArg: return "i"; 244 case oArg: return "o"; 245 case uArg: return "u"; 246 case xArg: return "x"; 247 case XArg: return "X"; 248 case fArg: return "f"; 249 case FArg: return "F"; 250 case eArg: return "e"; 251 case EArg: return "E"; 252 case gArg: return "g"; 253 case GArg: return "G"; 254 case aArg: return "a"; 255 case AArg: return "A"; 256 case cArg: return "c"; 257 case sArg: return "s"; 258 case pArg: return "p"; 259 case nArg: return "n"; 260 case PercentArg: return "%"; 261 case ScanListArg: return "["; 262 case InvalidSpecifier: return NULL; 263 264 // MacOS X unicode extensions. 265 case CArg: return "C"; 266 case SArg: return "S"; 267 268 // Objective-C specific specifiers. 269 case ObjCObjArg: return "@"; 270 271 // GlibC specific specifiers. 272 case PrintErrno: return "m"; 273 } 274 return NULL; 275 } 276 277 //===----------------------------------------------------------------------===// 278 // Methods on PrintfSpecifier. 279 //===----------------------------------------------------------------------===// 280 281 ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const { 282 const PrintfConversionSpecifier &CS = getConversionSpecifier(); 283 284 if (!CS.consumesDataArgument()) 285 return ArgTypeResult::Invalid(); 286 287 if (CS.getKind() == ConversionSpecifier::cArg) 288 switch (LM.getKind()) { 289 case LengthModifier::None: return Ctx.IntTy; 290 case LengthModifier::AsLong: return ArgTypeResult::WIntTy; 291 default: 292 return ArgTypeResult::Invalid(); 293 } 294 295 if (CS.isIntArg()) 296 switch (LM.getKind()) { 297 case LengthModifier::AsLongDouble: 298 return ArgTypeResult::Invalid(); 299 case LengthModifier::None: return Ctx.IntTy; 300 case LengthModifier::AsChar: return Ctx.SignedCharTy; 301 case LengthModifier::AsShort: return Ctx.ShortTy; 302 case LengthModifier::AsLong: return Ctx.LongTy; 303 case LengthModifier::AsLongLong: return Ctx.LongLongTy; 304 case LengthModifier::AsIntMax: 305 // FIXME: Return unknown for now. 306 return ArgTypeResult(); 307 case LengthModifier::AsSizeT: return Ctx.getSizeType(); 308 case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType(); 309 } 310 311 if (CS.isUIntArg()) 312 switch (LM.getKind()) { 313 case LengthModifier::AsLongDouble: 314 return ArgTypeResult::Invalid(); 315 case LengthModifier::None: return Ctx.UnsignedIntTy; 316 case LengthModifier::AsChar: return Ctx.UnsignedCharTy; 317 case LengthModifier::AsShort: return Ctx.UnsignedShortTy; 318 case LengthModifier::AsLong: return Ctx.UnsignedLongTy; 319 case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy; 320 case LengthModifier::AsIntMax: 321 // FIXME: Return unknown for now. 322 return ArgTypeResult(); 323 case LengthModifier::AsSizeT: 324 // FIXME: How to get the corresponding unsigned 325 // version of size_t? 326 return ArgTypeResult(); 327 case LengthModifier::AsPtrDiff: 328 // FIXME: How to get the corresponding unsigned 329 // version of ptrdiff_t? 330 return ArgTypeResult(); 331 } 332 333 if (CS.isDoubleArg()) { 334 if (LM.getKind() == LengthModifier::AsLongDouble) 335 return Ctx.LongDoubleTy; 336 return Ctx.DoubleTy; 337 } 338 339 switch (CS.getKind()) { 340 case ConversionSpecifier::sArg: 341 return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ? 342 ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); 343 case ConversionSpecifier::SArg: 344 // FIXME: This appears to be Mac OS X specific. 345 return ArgTypeResult::WCStrTy; 346 case ConversionSpecifier::CArg: 347 return Ctx.WCharTy; 348 case ConversionSpecifier::pArg: 349 return ArgTypeResult::CPointerTy; 350 default: 351 break; 352 } 353 354 // FIXME: Handle other cases. 355 return ArgTypeResult(); 356 } 357 358 bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt) { 359 // Handle strings first (char *, wchar_t *) 360 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { 361 CS.setKind(ConversionSpecifier::sArg); 362 363 // Disable irrelevant flags 364 HasAlternativeForm = 0; 365 HasLeadingZeroes = 0; 366 367 // Set the long length modifier for wide characters 368 if (QT->getPointeeType()->isWideCharType()) 369 LM.setKind(LengthModifier::AsWideChar); 370 371 return true; 372 } 373 374 // We can only work with builtin types. 375 const BuiltinType *BT = QT->getAs<BuiltinType>(); 376 if (!BT) 377 return false; 378 379 // Set length modifier 380 switch (BT->getKind()) { 381 case BuiltinType::Bool: 382 case BuiltinType::WChar_U: 383 case BuiltinType::WChar_S: 384 case BuiltinType::Char16: 385 case BuiltinType::Char32: 386 case BuiltinType::UInt128: 387 case BuiltinType::Int128: 388 case BuiltinType::Half: 389 // Various types which are non-trivial to correct. 390 return false; 391 392 #define SIGNED_TYPE(Id, SingletonId) 393 #define UNSIGNED_TYPE(Id, SingletonId) 394 #define FLOATING_TYPE(Id, SingletonId) 395 #define BUILTIN_TYPE(Id, SingletonId) \ 396 case BuiltinType::Id: 397 #include "clang/AST/BuiltinTypes.def" 398 // Misc other stuff which doesn't make sense here. 399 return false; 400 401 case BuiltinType::UInt: 402 case BuiltinType::Int: 403 case BuiltinType::Float: 404 case BuiltinType::Double: 405 LM.setKind(LengthModifier::None); 406 break; 407 408 case BuiltinType::Char_U: 409 case BuiltinType::UChar: 410 case BuiltinType::Char_S: 411 case BuiltinType::SChar: 412 LM.setKind(LengthModifier::AsChar); 413 break; 414 415 case BuiltinType::Short: 416 case BuiltinType::UShort: 417 LM.setKind(LengthModifier::AsShort); 418 break; 419 420 case BuiltinType::Long: 421 case BuiltinType::ULong: 422 LM.setKind(LengthModifier::AsLong); 423 break; 424 425 case BuiltinType::LongLong: 426 case BuiltinType::ULongLong: 427 LM.setKind(LengthModifier::AsLongLong); 428 break; 429 430 case BuiltinType::LongDouble: 431 LM.setKind(LengthModifier::AsLongDouble); 432 break; 433 } 434 435 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 436 if (isa<TypedefType>(QT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) { 437 const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier(); 438 if (Identifier->getName() == "size_t") { 439 LM.setKind(LengthModifier::AsSizeT); 440 } else if (Identifier->getName() == "ssize_t") { 441 // Not C99, but common in Unix. 442 LM.setKind(LengthModifier::AsSizeT); 443 } else if (Identifier->getName() == "intmax_t") { 444 LM.setKind(LengthModifier::AsIntMax); 445 } else if (Identifier->getName() == "uintmax_t") { 446 LM.setKind(LengthModifier::AsIntMax); 447 } else if (Identifier->getName() == "ptrdiff_t") { 448 LM.setKind(LengthModifier::AsPtrDiff); 449 } 450 } 451 452 // Set conversion specifier and disable any flags which do not apply to it. 453 // Let typedefs to char fall through to int, as %c is silly for uint8_t. 454 if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) { 455 CS.setKind(ConversionSpecifier::cArg); 456 LM.setKind(LengthModifier::None); 457 Precision.setHowSpecified(OptionalAmount::NotSpecified); 458 HasAlternativeForm = 0; 459 HasLeadingZeroes = 0; 460 HasPlusPrefix = 0; 461 } 462 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType 463 else if (QT->isRealFloatingType()) { 464 CS.setKind(ConversionSpecifier::fArg); 465 } 466 else if (QT->isSignedIntegerType()) { 467 CS.setKind(ConversionSpecifier::dArg); 468 HasAlternativeForm = 0; 469 } 470 else if (QT->isUnsignedIntegerType()) { 471 // Preserve the original formatting, e.g. 'X', 'o'. 472 if (!cast<PrintfConversionSpecifier>(CS).isUIntArg()) 473 CS.setKind(ConversionSpecifier::uArg); 474 HasAlternativeForm = 0; 475 HasPlusPrefix = 0; 476 } else { 477 llvm_unreachable("Unexpected type"); 478 } 479 480 return true; 481 } 482 483 void PrintfSpecifier::toString(raw_ostream &os) const { 484 // Whilst some features have no defined order, we are using the order 485 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1) 486 os << "%"; 487 488 // Positional args 489 if (usesPositionalArg()) { 490 os << getPositionalArgIndex() << "$"; 491 } 492 493 // Conversion flags 494 if (IsLeftJustified) os << "-"; 495 if (HasPlusPrefix) os << "+"; 496 if (HasSpacePrefix) os << " "; 497 if (HasAlternativeForm) os << "#"; 498 if (HasLeadingZeroes) os << "0"; 499 500 // Minimum field width 501 FieldWidth.toString(os); 502 // Precision 503 Precision.toString(os); 504 // Length modifier 505 os << LM.toString(); 506 // Conversion specifier 507 os << CS.toString(); 508 } 509 510 bool PrintfSpecifier::hasValidPlusPrefix() const { 511 if (!HasPlusPrefix) 512 return true; 513 514 // The plus prefix only makes sense for signed conversions 515 switch (CS.getKind()) { 516 case ConversionSpecifier::dArg: 517 case ConversionSpecifier::iArg: 518 case ConversionSpecifier::fArg: 519 case ConversionSpecifier::FArg: 520 case ConversionSpecifier::eArg: 521 case ConversionSpecifier::EArg: 522 case ConversionSpecifier::gArg: 523 case ConversionSpecifier::GArg: 524 case ConversionSpecifier::aArg: 525 case ConversionSpecifier::AArg: 526 return true; 527 528 default: 529 return false; 530 } 531 } 532 533 bool PrintfSpecifier::hasValidAlternativeForm() const { 534 if (!HasAlternativeForm) 535 return true; 536 537 // Alternate form flag only valid with the oxXaAeEfFgG conversions 538 switch (CS.getKind()) { 539 case ConversionSpecifier::oArg: 540 case ConversionSpecifier::xArg: 541 case ConversionSpecifier::XArg: 542 case ConversionSpecifier::aArg: 543 case ConversionSpecifier::AArg: 544 case ConversionSpecifier::eArg: 545 case ConversionSpecifier::EArg: 546 case ConversionSpecifier::fArg: 547 case ConversionSpecifier::FArg: 548 case ConversionSpecifier::gArg: 549 case ConversionSpecifier::GArg: 550 return true; 551 552 default: 553 return false; 554 } 555 } 556 557 bool PrintfSpecifier::hasValidLeadingZeros() const { 558 if (!HasLeadingZeroes) 559 return true; 560 561 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions 562 switch (CS.getKind()) { 563 case ConversionSpecifier::dArg: 564 case ConversionSpecifier::iArg: 565 case ConversionSpecifier::oArg: 566 case ConversionSpecifier::uArg: 567 case ConversionSpecifier::xArg: 568 case ConversionSpecifier::XArg: 569 case ConversionSpecifier::aArg: 570 case ConversionSpecifier::AArg: 571 case ConversionSpecifier::eArg: 572 case ConversionSpecifier::EArg: 573 case ConversionSpecifier::fArg: 574 case ConversionSpecifier::FArg: 575 case ConversionSpecifier::gArg: 576 case ConversionSpecifier::GArg: 577 return true; 578 579 default: 580 return false; 581 } 582 } 583 584 bool PrintfSpecifier::hasValidSpacePrefix() const { 585 if (!HasSpacePrefix) 586 return true; 587 588 // The space prefix only makes sense for signed conversions 589 switch (CS.getKind()) { 590 case ConversionSpecifier::dArg: 591 case ConversionSpecifier::iArg: 592 case ConversionSpecifier::fArg: 593 case ConversionSpecifier::FArg: 594 case ConversionSpecifier::eArg: 595 case ConversionSpecifier::EArg: 596 case ConversionSpecifier::gArg: 597 case ConversionSpecifier::GArg: 598 case ConversionSpecifier::aArg: 599 case ConversionSpecifier::AArg: 600 return true; 601 602 default: 603 return false; 604 } 605 } 606 607 bool PrintfSpecifier::hasValidLeftJustified() const { 608 if (!IsLeftJustified) 609 return true; 610 611 // The left justified flag is valid for all conversions except n 612 switch (CS.getKind()) { 613 case ConversionSpecifier::nArg: 614 return false; 615 616 default: 617 return true; 618 } 619 } 620 621 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const { 622 if (!HasThousandsGrouping) 623 return true; 624 625 switch (CS.getKind()) { 626 case ConversionSpecifier::dArg: 627 case ConversionSpecifier::iArg: 628 case ConversionSpecifier::uArg: 629 case ConversionSpecifier::fArg: 630 case ConversionSpecifier::FArg: 631 case ConversionSpecifier::gArg: 632 case ConversionSpecifier::GArg: 633 return true; 634 default: 635 return false; 636 } 637 } 638 639 bool PrintfSpecifier::hasValidPrecision() const { 640 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) 641 return true; 642 643 // Precision is only valid with the diouxXaAeEfFgGs conversions 644 switch (CS.getKind()) { 645 case ConversionSpecifier::dArg: 646 case ConversionSpecifier::iArg: 647 case ConversionSpecifier::oArg: 648 case ConversionSpecifier::uArg: 649 case ConversionSpecifier::xArg: 650 case ConversionSpecifier::XArg: 651 case ConversionSpecifier::aArg: 652 case ConversionSpecifier::AArg: 653 case ConversionSpecifier::eArg: 654 case ConversionSpecifier::EArg: 655 case ConversionSpecifier::fArg: 656 case ConversionSpecifier::FArg: 657 case ConversionSpecifier::gArg: 658 case ConversionSpecifier::GArg: 659 case ConversionSpecifier::sArg: 660 return true; 661 662 default: 663 return false; 664 } 665 } 666 bool PrintfSpecifier::hasValidFieldWidth() const { 667 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified) 668 return true; 669 670 // The field width is valid for all conversions except n 671 switch (CS.getKind()) { 672 case ConversionSpecifier::nArg: 673 return false; 674 675 default: 676 return true; 677 } 678 } 679