1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Handling of format string in printf and friends. The structure of format 11 // strings for fprintf() are described in C99 7.19.6.1. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/Analyses/FormatString.h" 16 #include "FormatStringParsing.h" 17 18 using clang::analyze_format_string::ArgTypeResult; 19 using clang::analyze_format_string::FormatStringHandler; 20 using clang::analyze_format_string::LengthModifier; 21 using clang::analyze_format_string::OptionalAmount; 22 using clang::analyze_format_string::ConversionSpecifier; 23 using clang::analyze_printf::PrintfSpecifier; 24 25 using namespace clang; 26 27 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> 28 PrintfSpecifierResult; 29 30 //===----------------------------------------------------------------------===// 31 // Methods for parsing format strings. 32 //===----------------------------------------------------------------------===// 33 34 using analyze_format_string::ParseNonPositionAmount; 35 36 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, 37 const char *Start, const char *&Beg, const char *E, 38 unsigned *argIndex) { 39 if (argIndex) { 40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 41 } 42 else { 43 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 44 analyze_format_string::PrecisionPos); 45 if (Amt.isInvalid()) 46 return true; 47 FS.setPrecision(Amt); 48 } 49 return false; 50 } 51 52 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, 53 const char *&Beg, 54 const char *E, 55 unsigned &argIndex) { 56 57 using namespace clang::analyze_format_string; 58 using namespace clang::analyze_printf; 59 60 const char *I = Beg; 61 const char *Start = 0; 62 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 63 64 // Look for a '%' character that indicates the start of a format specifier. 65 for ( ; I != E ; ++I) { 66 char c = *I; 67 if (c == '\0') { 68 // Detect spurious null characters, which are likely errors. 69 H.HandleNullChar(I); 70 return true; 71 } 72 if (c == '%') { 73 Start = I++; // Record the start of the format specifier. 74 break; 75 } 76 } 77 78 // No format specifier found? 79 if (!Start) 80 return false; 81 82 if (I == E) { 83 // No more characters left? 84 H.HandleIncompleteSpecifier(Start, E - Start); 85 return true; 86 } 87 88 PrintfSpecifier FS; 89 if (ParseArgPosition(H, FS, Start, I, E)) 90 return true; 91 92 if (I == E) { 93 // No more characters left? 94 H.HandleIncompleteSpecifier(Start, E - Start); 95 return true; 96 } 97 98 // Look for flags (if any). 99 bool hasMore = true; 100 for ( ; I != E; ++I) { 101 switch (*I) { 102 default: hasMore = false; break; 103 case '\'': 104 // FIXME: POSIX specific. Always accept? 105 FS.setHasThousandsGrouping(I); 106 break; 107 case '-': FS.setIsLeftJustified(I); break; 108 case '+': FS.setHasPlusPrefix(I); break; 109 case ' ': FS.setHasSpacePrefix(I); break; 110 case '#': FS.setHasAlternativeForm(I); break; 111 case '0': FS.setHasLeadingZeros(I); break; 112 } 113 if (!hasMore) 114 break; 115 } 116 117 if (I == E) { 118 // No more characters left? 119 H.HandleIncompleteSpecifier(Start, E - Start); 120 return true; 121 } 122 123 // Look for the field width (if any). 124 if (ParseFieldWidth(H, FS, Start, I, E, 125 FS.usesPositionalArg() ? 0 : &argIndex)) 126 return true; 127 128 if (I == E) { 129 // No more characters left? 130 H.HandleIncompleteSpecifier(Start, E - Start); 131 return true; 132 } 133 134 // Look for the precision (if any). 135 if (*I == '.') { 136 ++I; 137 if (I == E) { 138 H.HandleIncompleteSpecifier(Start, E - Start); 139 return true; 140 } 141 142 if (ParsePrecision(H, FS, Start, I, E, 143 FS.usesPositionalArg() ? 0 : &argIndex)) 144 return true; 145 146 if (I == E) { 147 // No more characters left? 148 H.HandleIncompleteSpecifier(Start, E - Start); 149 return true; 150 } 151 } 152 153 // Look for the length modifier. 154 if (ParseLengthModifier(FS, I, E) && I == E) { 155 // No more characters left? 156 H.HandleIncompleteSpecifier(Start, E - Start); 157 return true; 158 } 159 160 if (*I == '\0') { 161 // Detect spurious null characters, which are likely errors. 162 H.HandleNullChar(I); 163 return true; 164 } 165 166 // Finally, look for the conversion specifier. 167 const char *conversionPosition = I++; 168 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 169 switch (*conversionPosition) { 170 default: 171 break; 172 // C99: 7.19.6.1 (section 8). 173 case '%': k = ConversionSpecifier::PercentArg; break; 174 case 'A': k = ConversionSpecifier::AArg; break; 175 case 'E': k = ConversionSpecifier::EArg; break; 176 case 'F': k = ConversionSpecifier::FArg; break; 177 case 'G': k = ConversionSpecifier::GArg; break; 178 case 'X': k = ConversionSpecifier::XArg; break; 179 case 'a': k = ConversionSpecifier::aArg; break; 180 case 'c': k = ConversionSpecifier::cArg; break; 181 case 'd': k = ConversionSpecifier::dArg; break; 182 case 'e': k = ConversionSpecifier::eArg; break; 183 case 'f': k = ConversionSpecifier::fArg; break; 184 case 'g': k = ConversionSpecifier::gArg; break; 185 case 'i': k = ConversionSpecifier::iArg; break; 186 case 'n': k = ConversionSpecifier::nArg; break; 187 case 'o': k = ConversionSpecifier::oArg; break; 188 case 'p': k = ConversionSpecifier::pArg; break; 189 case 's': k = ConversionSpecifier::sArg; break; 190 case 'u': k = ConversionSpecifier::uArg; break; 191 case 'x': k = ConversionSpecifier::xArg; break; 192 // POSIX specific. 193 case 'C': k = ConversionSpecifier::CArg; break; 194 case 'S': k = ConversionSpecifier::SArg; break; 195 // Objective-C. 196 case '@': k = ConversionSpecifier::ObjCObjArg; break; 197 // Glibc specific. 198 case 'm': k = ConversionSpecifier::PrintErrno; break; 199 } 200 PrintfConversionSpecifier CS(conversionPosition, k); 201 FS.setConversionSpecifier(CS); 202 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 203 FS.setArgIndex(argIndex++); 204 205 if (k == ConversionSpecifier::InvalidSpecifier) { 206 // Assume the conversion takes one argument. 207 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start); 208 } 209 return PrintfSpecifierResult(Start, FS); 210 } 211 212 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, 213 const char *I, 214 const char *E) { 215 216 unsigned argIndex = 0; 217 218 // Keep looking for a format specifier until we have exhausted the string. 219 while (I != E) { 220 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex); 221 // Did a fail-stop error of any kind occur when parsing the specifier? 222 // If so, don't do any more processing. 223 if (FSR.shouldStop()) 224 return true;; 225 // Did we exhaust the string or encounter an error that 226 // we can recover from? 227 if (!FSR.hasValue()) 228 continue; 229 // We have a format specifier. Pass it to the callback. 230 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), 231 I - FSR.getStart())) 232 return true; 233 } 234 assert(I == E && "Format string not exhausted"); 235 return false; 236 } 237 238 //===----------------------------------------------------------------------===// 239 // Methods on ConversionSpecifier. 240 //===----------------------------------------------------------------------===// 241 const char *ConversionSpecifier::toString() const { 242 switch (kind) { 243 case dArg: return "d"; 244 case iArg: return "i"; 245 case oArg: return "o"; 246 case uArg: return "u"; 247 case xArg: return "x"; 248 case XArg: return "X"; 249 case fArg: return "f"; 250 case FArg: return "F"; 251 case eArg: return "e"; 252 case EArg: return "E"; 253 case gArg: return "g"; 254 case GArg: return "G"; 255 case aArg: return "a"; 256 case AArg: return "A"; 257 case cArg: return "c"; 258 case sArg: return "s"; 259 case pArg: return "p"; 260 case nArg: return "n"; 261 case PercentArg: return "%"; 262 case ScanListArg: return "["; 263 case InvalidSpecifier: return NULL; 264 265 // MacOS X unicode extensions. 266 case CArg: return "C"; 267 case SArg: return "S"; 268 269 // Objective-C specific specifiers. 270 case ObjCObjArg: return "@"; 271 272 // GlibC specific specifiers. 273 case PrintErrno: return "m"; 274 } 275 return NULL; 276 } 277 278 //===----------------------------------------------------------------------===// 279 // Methods on PrintfSpecifier. 280 //===----------------------------------------------------------------------===// 281 282 ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const { 283 const PrintfConversionSpecifier &CS = getConversionSpecifier(); 284 285 if (!CS.consumesDataArgument()) 286 return ArgTypeResult::Invalid(); 287 288 if (CS.getKind() == ConversionSpecifier::cArg) 289 switch (LM.getKind()) { 290 case LengthModifier::None: return Ctx.IntTy; 291 case LengthModifier::AsLong: return ArgTypeResult::WIntTy; 292 default: 293 return ArgTypeResult::Invalid(); 294 } 295 296 if (CS.isIntArg()) 297 switch (LM.getKind()) { 298 case LengthModifier::AsLongDouble: 299 return ArgTypeResult::Invalid(); 300 case LengthModifier::None: return Ctx.IntTy; 301 case LengthModifier::AsChar: return Ctx.SignedCharTy; 302 case LengthModifier::AsShort: return Ctx.ShortTy; 303 case LengthModifier::AsLong: return Ctx.LongTy; 304 case LengthModifier::AsLongLong: return Ctx.LongLongTy; 305 case LengthModifier::AsIntMax: 306 // FIXME: Return unknown for now. 307 return ArgTypeResult(); 308 case LengthModifier::AsSizeT: return Ctx.getSizeType(); 309 case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType(); 310 } 311 312 if (CS.isUIntArg()) 313 switch (LM.getKind()) { 314 case LengthModifier::AsLongDouble: 315 return ArgTypeResult::Invalid(); 316 case LengthModifier::None: return Ctx.UnsignedIntTy; 317 case LengthModifier::AsChar: return Ctx.UnsignedCharTy; 318 case LengthModifier::AsShort: return Ctx.UnsignedShortTy; 319 case LengthModifier::AsLong: return Ctx.UnsignedLongTy; 320 case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy; 321 case LengthModifier::AsIntMax: 322 // FIXME: Return unknown for now. 323 return ArgTypeResult(); 324 case LengthModifier::AsSizeT: 325 // FIXME: How to get the corresponding unsigned 326 // version of size_t? 327 return ArgTypeResult(); 328 case LengthModifier::AsPtrDiff: 329 // FIXME: How to get the corresponding unsigned 330 // version of ptrdiff_t? 331 return ArgTypeResult(); 332 } 333 334 if (CS.isDoubleArg()) { 335 if (LM.getKind() == LengthModifier::AsLongDouble) 336 return Ctx.LongDoubleTy; 337 return Ctx.DoubleTy; 338 } 339 340 switch (CS.getKind()) { 341 case ConversionSpecifier::sArg: 342 return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ? 343 ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); 344 case ConversionSpecifier::SArg: 345 // FIXME: This appears to be Mac OS X specific. 346 return ArgTypeResult::WCStrTy; 347 case ConversionSpecifier::CArg: 348 return Ctx.WCharTy; 349 case ConversionSpecifier::pArg: 350 return ArgTypeResult::CPointerTy; 351 default: 352 break; 353 } 354 355 // FIXME: Handle other cases. 356 return ArgTypeResult(); 357 } 358 359 bool PrintfSpecifier::fixType(QualType QT) { 360 // Handle strings first (char *, wchar_t *) 361 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { 362 CS.setKind(ConversionSpecifier::sArg); 363 364 // Disable irrelevant flags 365 HasAlternativeForm = 0; 366 HasLeadingZeroes = 0; 367 368 // Set the long length modifier for wide characters 369 if (QT->getPointeeType()->isWideCharType()) 370 LM.setKind(LengthModifier::AsWideChar); 371 372 return true; 373 } 374 375 // We can only work with builtin types. 376 if (!QT->isBuiltinType()) 377 return false; 378 379 // Everything else should be a base type 380 const BuiltinType *BT = QT->getAs<BuiltinType>(); 381 382 // Set length modifier 383 switch (BT->getKind()) { 384 case BuiltinType::Bool: 385 case BuiltinType::WChar_U: 386 case BuiltinType::WChar_S: 387 case BuiltinType::Char16: 388 case BuiltinType::Char32: 389 case BuiltinType::UInt128: 390 case BuiltinType::Int128: 391 // Integral types which are non-trivial to correct. 392 return false; 393 394 case BuiltinType::Void: 395 case BuiltinType::NullPtr: 396 case BuiltinType::ObjCId: 397 case BuiltinType::ObjCClass: 398 case BuiltinType::ObjCSel: 399 case BuiltinType::Dependent: 400 case BuiltinType::Overload: 401 case BuiltinType::BoundMember: 402 case BuiltinType::UnknownAny: 403 // Misc other stuff which doesn't make sense here. 404 return false; 405 406 case BuiltinType::UInt: 407 case BuiltinType::Int: 408 case BuiltinType::Float: 409 case BuiltinType::Double: 410 LM.setKind(LengthModifier::None); 411 break; 412 413 case BuiltinType::Char_U: 414 case BuiltinType::UChar: 415 case BuiltinType::Char_S: 416 case BuiltinType::SChar: 417 LM.setKind(LengthModifier::AsChar); 418 break; 419 420 case BuiltinType::Short: 421 case BuiltinType::UShort: 422 LM.setKind(LengthModifier::AsShort); 423 break; 424 425 case BuiltinType::Long: 426 case BuiltinType::ULong: 427 LM.setKind(LengthModifier::AsLong); 428 break; 429 430 case BuiltinType::LongLong: 431 case BuiltinType::ULongLong: 432 LM.setKind(LengthModifier::AsLongLong); 433 break; 434 435 case BuiltinType::LongDouble: 436 LM.setKind(LengthModifier::AsLongDouble); 437 break; 438 } 439 440 // Set conversion specifier and disable any flags which do not apply to it. 441 // Let typedefs to char fall through to int, as %c is silly for uint8_t. 442 if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) { 443 CS.setKind(ConversionSpecifier::cArg); 444 LM.setKind(LengthModifier::None); 445 Precision.setHowSpecified(OptionalAmount::NotSpecified); 446 HasAlternativeForm = 0; 447 HasLeadingZeroes = 0; 448 HasPlusPrefix = 0; 449 } 450 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType 451 else if (QT->isRealFloatingType()) { 452 CS.setKind(ConversionSpecifier::fArg); 453 } 454 else if (QT->isSignedIntegerType()) { 455 CS.setKind(ConversionSpecifier::dArg); 456 HasAlternativeForm = 0; 457 } 458 else if (QT->isUnsignedIntegerType()) { 459 // Preserve the original formatting, e.g. 'X', 'o'. 460 if (!cast<PrintfConversionSpecifier>(CS).isUIntArg()) 461 CS.setKind(ConversionSpecifier::uArg); 462 HasAlternativeForm = 0; 463 HasPlusPrefix = 0; 464 } 465 else { 466 assert(0 && "Unexpected type"); 467 } 468 469 return true; 470 } 471 472 void PrintfSpecifier::toString(llvm::raw_ostream &os) const { 473 // Whilst some features have no defined order, we are using the order 474 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1) 475 os << "%"; 476 477 // Positional args 478 if (usesPositionalArg()) { 479 os << getPositionalArgIndex() << "$"; 480 } 481 482 // Conversion flags 483 if (IsLeftJustified) os << "-"; 484 if (HasPlusPrefix) os << "+"; 485 if (HasSpacePrefix) os << " "; 486 if (HasAlternativeForm) os << "#"; 487 if (HasLeadingZeroes) os << "0"; 488 489 // Minimum field width 490 FieldWidth.toString(os); 491 // Precision 492 Precision.toString(os); 493 // Length modifier 494 os << LM.toString(); 495 // Conversion specifier 496 os << CS.toString(); 497 } 498 499 bool PrintfSpecifier::hasValidPlusPrefix() const { 500 if (!HasPlusPrefix) 501 return true; 502 503 // The plus prefix only makes sense for signed conversions 504 switch (CS.getKind()) { 505 case ConversionSpecifier::dArg: 506 case ConversionSpecifier::iArg: 507 case ConversionSpecifier::fArg: 508 case ConversionSpecifier::FArg: 509 case ConversionSpecifier::eArg: 510 case ConversionSpecifier::EArg: 511 case ConversionSpecifier::gArg: 512 case ConversionSpecifier::GArg: 513 case ConversionSpecifier::aArg: 514 case ConversionSpecifier::AArg: 515 return true; 516 517 default: 518 return false; 519 } 520 } 521 522 bool PrintfSpecifier::hasValidAlternativeForm() const { 523 if (!HasAlternativeForm) 524 return true; 525 526 // Alternate form flag only valid with the oxXaAeEfFgG conversions 527 switch (CS.getKind()) { 528 case ConversionSpecifier::oArg: 529 case ConversionSpecifier::xArg: 530 case ConversionSpecifier::XArg: 531 case ConversionSpecifier::aArg: 532 case ConversionSpecifier::AArg: 533 case ConversionSpecifier::eArg: 534 case ConversionSpecifier::EArg: 535 case ConversionSpecifier::fArg: 536 case ConversionSpecifier::FArg: 537 case ConversionSpecifier::gArg: 538 case ConversionSpecifier::GArg: 539 return true; 540 541 default: 542 return false; 543 } 544 } 545 546 bool PrintfSpecifier::hasValidLeadingZeros() const { 547 if (!HasLeadingZeroes) 548 return true; 549 550 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions 551 switch (CS.getKind()) { 552 case ConversionSpecifier::dArg: 553 case ConversionSpecifier::iArg: 554 case ConversionSpecifier::oArg: 555 case ConversionSpecifier::uArg: 556 case ConversionSpecifier::xArg: 557 case ConversionSpecifier::XArg: 558 case ConversionSpecifier::aArg: 559 case ConversionSpecifier::AArg: 560 case ConversionSpecifier::eArg: 561 case ConversionSpecifier::EArg: 562 case ConversionSpecifier::fArg: 563 case ConversionSpecifier::FArg: 564 case ConversionSpecifier::gArg: 565 case ConversionSpecifier::GArg: 566 return true; 567 568 default: 569 return false; 570 } 571 } 572 573 bool PrintfSpecifier::hasValidSpacePrefix() const { 574 if (!HasSpacePrefix) 575 return true; 576 577 // The space prefix only makes sense for signed conversions 578 switch (CS.getKind()) { 579 case ConversionSpecifier::dArg: 580 case ConversionSpecifier::iArg: 581 case ConversionSpecifier::fArg: 582 case ConversionSpecifier::FArg: 583 case ConversionSpecifier::eArg: 584 case ConversionSpecifier::EArg: 585 case ConversionSpecifier::gArg: 586 case ConversionSpecifier::GArg: 587 case ConversionSpecifier::aArg: 588 case ConversionSpecifier::AArg: 589 return true; 590 591 default: 592 return false; 593 } 594 } 595 596 bool PrintfSpecifier::hasValidLeftJustified() const { 597 if (!IsLeftJustified) 598 return true; 599 600 // The left justified flag is valid for all conversions except n 601 switch (CS.getKind()) { 602 case ConversionSpecifier::nArg: 603 return false; 604 605 default: 606 return true; 607 } 608 } 609 610 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const { 611 if (!HasThousandsGrouping) 612 return true; 613 614 switch (CS.getKind()) { 615 case ConversionSpecifier::dArg: 616 case ConversionSpecifier::iArg: 617 case ConversionSpecifier::uArg: 618 case ConversionSpecifier::fArg: 619 case ConversionSpecifier::FArg: 620 case ConversionSpecifier::gArg: 621 case ConversionSpecifier::GArg: 622 return true; 623 default: 624 return false; 625 } 626 } 627 628 bool PrintfSpecifier::hasValidPrecision() const { 629 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) 630 return true; 631 632 // Precision is only valid with the diouxXaAeEfFgGs conversions 633 switch (CS.getKind()) { 634 case ConversionSpecifier::dArg: 635 case ConversionSpecifier::iArg: 636 case ConversionSpecifier::oArg: 637 case ConversionSpecifier::uArg: 638 case ConversionSpecifier::xArg: 639 case ConversionSpecifier::XArg: 640 case ConversionSpecifier::aArg: 641 case ConversionSpecifier::AArg: 642 case ConversionSpecifier::eArg: 643 case ConversionSpecifier::EArg: 644 case ConversionSpecifier::fArg: 645 case ConversionSpecifier::FArg: 646 case ConversionSpecifier::gArg: 647 case ConversionSpecifier::GArg: 648 case ConversionSpecifier::sArg: 649 return true; 650 651 default: 652 return false; 653 } 654 } 655 bool PrintfSpecifier::hasValidFieldWidth() const { 656 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified) 657 return true; 658 659 // The field width is valid for all conversions except n 660 switch (CS.getKind()) { 661 case ConversionSpecifier::nArg: 662 return false; 663 664 default: 665 return true; 666 } 667 } 668