1 // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Shared details for processing format strings of printf and scanf 11 // (and friends). 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "FormatStringParsing.h" 16 #include "clang/Basic/LangOptions.h" 17 18 using clang::analyze_format_string::ArgTypeResult; 19 using clang::analyze_format_string::FormatStringHandler; 20 using clang::analyze_format_string::FormatSpecifier; 21 using clang::analyze_format_string::LengthModifier; 22 using clang::analyze_format_string::OptionalAmount; 23 using clang::analyze_format_string::PositionContext; 24 using clang::analyze_format_string::ConversionSpecifier; 25 using namespace clang; 26 27 // Key function to FormatStringHandler. 28 FormatStringHandler::~FormatStringHandler() {} 29 30 //===----------------------------------------------------------------------===// 31 // Functions for parsing format strings components in both printf and 32 // scanf format strings. 33 //===----------------------------------------------------------------------===// 34 35 OptionalAmount 36 clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { 37 const char *I = Beg; 38 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 39 40 unsigned accumulator = 0; 41 bool hasDigits = false; 42 43 for ( ; I != E; ++I) { 44 char c = *I; 45 if (c >= '0' && c <= '9') { 46 hasDigits = true; 47 accumulator = (accumulator * 10) + (c - '0'); 48 continue; 49 } 50 51 if (hasDigits) 52 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, 53 false); 54 55 break; 56 } 57 58 return OptionalAmount(); 59 } 60 61 OptionalAmount 62 clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, 63 const char *E, 64 unsigned &argIndex) { 65 if (*Beg == '*') { 66 ++Beg; 67 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); 68 } 69 70 return ParseAmount(Beg, E); 71 } 72 73 OptionalAmount 74 clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, 75 const char *Start, 76 const char *&Beg, 77 const char *E, 78 PositionContext p) { 79 if (*Beg == '*') { 80 const char *I = Beg + 1; 81 const OptionalAmount &Amt = ParseAmount(I, E); 82 83 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { 84 H.HandleInvalidPosition(Beg, I - Beg, p); 85 return OptionalAmount(false); 86 } 87 88 if (I == E) { 89 // No more characters left? 90 H.HandleIncompleteSpecifier(Start, E - Start); 91 return OptionalAmount(false); 92 } 93 94 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 95 96 if (*I == '$') { 97 // Handle positional arguments 98 99 // Special case: '*0$', since this is an easy mistake. 100 if (Amt.getConstantAmount() == 0) { 101 H.HandleZeroPosition(Beg, I - Beg + 1); 102 return OptionalAmount(false); 103 } 104 105 const char *Tmp = Beg; 106 Beg = ++I; 107 108 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, 109 Tmp, 0, true); 110 } 111 112 H.HandleInvalidPosition(Beg, I - Beg, p); 113 return OptionalAmount(false); 114 } 115 116 return ParseAmount(Beg, E); 117 } 118 119 120 bool 121 clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, 122 FormatSpecifier &CS, 123 const char *Start, 124 const char *&Beg, const char *E, 125 unsigned *argIndex) { 126 // FIXME: Support negative field widths. 127 if (argIndex) { 128 CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); 129 } 130 else { 131 const OptionalAmount Amt = 132 ParsePositionAmount(H, Start, Beg, E, 133 analyze_format_string::FieldWidthPos); 134 135 if (Amt.isInvalid()) 136 return true; 137 CS.setFieldWidth(Amt); 138 } 139 return false; 140 } 141 142 bool 143 clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, 144 FormatSpecifier &FS, 145 const char *Start, 146 const char *&Beg, 147 const char *E) { 148 const char *I = Beg; 149 150 const OptionalAmount &Amt = ParseAmount(I, E); 151 152 if (I == E) { 153 // No more characters left? 154 H.HandleIncompleteSpecifier(Start, E - Start); 155 return true; 156 } 157 158 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { 159 // Warn that positional arguments are non-standard. 160 H.HandlePosition(Start, I - Start); 161 162 // Special case: '%0$', since this is an easy mistake. 163 if (Amt.getConstantAmount() == 0) { 164 H.HandleZeroPosition(Start, I - Start); 165 return true; 166 } 167 168 FS.setArgIndex(Amt.getConstantAmount() - 1); 169 FS.setUsesPositionalArg(); 170 // Update the caller's pointer if we decided to consume 171 // these characters. 172 Beg = I; 173 return false; 174 } 175 176 return false; 177 } 178 179 bool 180 clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, 181 const char *&I, 182 const char *E, 183 const LangOptions &LO, 184 bool IsScanf) { 185 LengthModifier::Kind lmKind = LengthModifier::None; 186 const char *lmPosition = I; 187 switch (*I) { 188 default: 189 return false; 190 case 'h': 191 ++I; 192 lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar) 193 : LengthModifier::AsShort; 194 break; 195 case 'l': 196 ++I; 197 lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong) 198 : LengthModifier::AsLong; 199 break; 200 case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; 201 case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; 202 case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; 203 case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; 204 case 'q': lmKind = LengthModifier::AsQuad; ++I; break; 205 case 'a': 206 if (IsScanf && !LO.C99 && !LO.CPlusPlus0x) { 207 // For scanf in C90, look at the next character to see if this should 208 // be parsed as the GNU extension 'a' length modifier. If not, this 209 // will be parsed as a conversion specifier. 210 ++I; 211 if (I != E && (*I == 's' || *I == 'S' || *I == '[')) { 212 lmKind = LengthModifier::AsAllocate; 213 break; 214 } 215 --I; 216 } 217 return false; 218 case 'm': 219 if (IsScanf) { 220 lmKind = LengthModifier::AsMAllocate; 221 ++I; 222 break; 223 } 224 return false; 225 } 226 LengthModifier lm(lmPosition, lmKind); 227 FS.setLengthModifier(lm); 228 return true; 229 } 230 231 //===----------------------------------------------------------------------===// 232 // Methods on ArgTypeResult. 233 //===----------------------------------------------------------------------===// 234 235 bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { 236 switch (K) { 237 case InvalidTy: 238 llvm_unreachable("ArgTypeResult must be valid"); 239 240 case UnknownTy: 241 return true; 242 243 case AnyCharTy: { 244 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 245 switch (BT->getKind()) { 246 default: 247 break; 248 case BuiltinType::Char_S: 249 case BuiltinType::SChar: 250 case BuiltinType::UChar: 251 case BuiltinType::Char_U: 252 return true; 253 } 254 return false; 255 } 256 257 case SpecificTy: { 258 argTy = C.getCanonicalType(argTy).getUnqualifiedType(); 259 if (T == argTy) 260 return true; 261 // Check for "compatible types". 262 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 263 switch (BT->getKind()) { 264 default: 265 break; 266 case BuiltinType::Char_S: 267 case BuiltinType::SChar: 268 return T == C.UnsignedCharTy; 269 case BuiltinType::Char_U: 270 case BuiltinType::UChar: 271 return T == C.SignedCharTy; 272 case BuiltinType::Short: 273 return T == C.UnsignedShortTy; 274 case BuiltinType::UShort: 275 return T == C.ShortTy; 276 case BuiltinType::Int: 277 return T == C.UnsignedIntTy; 278 case BuiltinType::UInt: 279 return T == C.IntTy; 280 case BuiltinType::Long: 281 return T == C.UnsignedLongTy; 282 case BuiltinType::ULong: 283 return T == C.LongTy; 284 case BuiltinType::LongLong: 285 return T == C.UnsignedLongLongTy; 286 case BuiltinType::ULongLong: 287 return T == C.LongLongTy; 288 } 289 return false; 290 } 291 292 case CStrTy: { 293 const PointerType *PT = argTy->getAs<PointerType>(); 294 if (!PT) 295 return false; 296 QualType pointeeTy = PT->getPointeeType(); 297 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) 298 switch (BT->getKind()) { 299 case BuiltinType::Void: 300 case BuiltinType::Char_U: 301 case BuiltinType::UChar: 302 case BuiltinType::Char_S: 303 case BuiltinType::SChar: 304 return true; 305 default: 306 break; 307 } 308 309 return false; 310 } 311 312 case WCStrTy: { 313 const PointerType *PT = argTy->getAs<PointerType>(); 314 if (!PT) 315 return false; 316 QualType pointeeTy = 317 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); 318 return pointeeTy == C.getWCharType(); 319 } 320 321 case WIntTy: { 322 // Instead of doing a lookup for the definition of 'wint_t' (which 323 // is defined by the system headers) instead see if wchar_t and 324 // the argument type promote to the same type. 325 QualType PromoWChar = 326 C.getWCharType()->isPromotableIntegerType() 327 ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType(); 328 QualType PromoArg = 329 argTy->isPromotableIntegerType() 330 ? C.getPromotedIntegerType(argTy) : argTy; 331 332 PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType(); 333 PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); 334 335 return PromoWChar == PromoArg; 336 } 337 338 case CPointerTy: 339 return argTy->isPointerType() || argTy->isObjCObjectPointerType() || 340 argTy->isBlockPointerType() || argTy->isNullPtrType(); 341 342 case ObjCPointerTy: { 343 if (argTy->getAs<ObjCObjectPointerType>() || 344 argTy->getAs<BlockPointerType>()) 345 return true; 346 347 // Handle implicit toll-free bridging. 348 if (const PointerType *PT = argTy->getAs<PointerType>()) { 349 // Things such as CFTypeRef are really just opaque pointers 350 // to C structs representing CF types that can often be bridged 351 // to Objective-C objects. Since the compiler doesn't know which 352 // structs can be toll-free bridged, we just accept them all. 353 QualType pointee = PT->getPointeeType(); 354 if (pointee->getAsStructureType() || pointee->isVoidType()) 355 return true; 356 } 357 return false; 358 } 359 } 360 361 llvm_unreachable("Invalid ArgTypeResult Kind!"); 362 } 363 364 QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { 365 switch (K) { 366 case InvalidTy: 367 llvm_unreachable("No representative type for Invalid ArgTypeResult"); 368 case UnknownTy: 369 return QualType(); 370 case AnyCharTy: 371 return C.CharTy; 372 case SpecificTy: 373 return T; 374 case CStrTy: 375 return C.getPointerType(C.CharTy); 376 case WCStrTy: 377 return C.getPointerType(C.getWCharType()); 378 case ObjCPointerTy: 379 return C.ObjCBuiltinIdTy; 380 case CPointerTy: 381 return C.VoidPtrTy; 382 case WIntTy: { 383 QualType WC = C.getWCharType(); 384 return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC; 385 } 386 } 387 388 llvm_unreachable("Invalid ArgTypeResult Kind!"); 389 } 390 391 std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const { 392 std::string S = getRepresentativeType(C).getAsString(); 393 if (Name && S != Name) 394 return std::string("'") + Name + "' (aka '" + S + "')"; 395 return std::string("'") + S + "'"; 396 } 397 398 399 //===----------------------------------------------------------------------===// 400 // Methods on OptionalAmount. 401 //===----------------------------------------------------------------------===// 402 403 ArgTypeResult 404 analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { 405 return Ctx.IntTy; 406 } 407 408 //===----------------------------------------------------------------------===// 409 // Methods on LengthModifier. 410 //===----------------------------------------------------------------------===// 411 412 const char * 413 analyze_format_string::LengthModifier::toString() const { 414 switch (kind) { 415 case AsChar: 416 return "hh"; 417 case AsShort: 418 return "h"; 419 case AsLong: // or AsWideChar 420 return "l"; 421 case AsLongLong: 422 return "ll"; 423 case AsQuad: 424 return "q"; 425 case AsIntMax: 426 return "j"; 427 case AsSizeT: 428 return "z"; 429 case AsPtrDiff: 430 return "t"; 431 case AsLongDouble: 432 return "L"; 433 case AsAllocate: 434 return "a"; 435 case AsMAllocate: 436 return "m"; 437 case None: 438 return ""; 439 } 440 return NULL; 441 } 442 443 //===----------------------------------------------------------------------===// 444 // Methods on ConversionSpecifier. 445 //===----------------------------------------------------------------------===// 446 447 const char *ConversionSpecifier::toString() const { 448 switch (kind) { 449 case dArg: return "d"; 450 case iArg: return "i"; 451 case oArg: return "o"; 452 case uArg: return "u"; 453 case xArg: return "x"; 454 case XArg: return "X"; 455 case fArg: return "f"; 456 case FArg: return "F"; 457 case eArg: return "e"; 458 case EArg: return "E"; 459 case gArg: return "g"; 460 case GArg: return "G"; 461 case aArg: return "a"; 462 case AArg: return "A"; 463 case cArg: return "c"; 464 case sArg: return "s"; 465 case pArg: return "p"; 466 case nArg: return "n"; 467 case PercentArg: return "%"; 468 case ScanListArg: return "["; 469 case InvalidSpecifier: return NULL; 470 471 // MacOS X unicode extensions. 472 case CArg: return "C"; 473 case SArg: return "S"; 474 475 // Objective-C specific specifiers. 476 case ObjCObjArg: return "@"; 477 478 // GlibC specific specifiers. 479 case PrintErrno: return "m"; 480 } 481 return NULL; 482 } 483 484 //===----------------------------------------------------------------------===// 485 // Methods on OptionalAmount. 486 //===----------------------------------------------------------------------===// 487 488 void OptionalAmount::toString(raw_ostream &os) const { 489 switch (hs) { 490 case Invalid: 491 case NotSpecified: 492 return; 493 case Arg: 494 if (UsesDotPrefix) 495 os << "."; 496 if (usesPositionalArg()) 497 os << "*" << getPositionalArgIndex() << "$"; 498 else 499 os << "*"; 500 break; 501 case Constant: 502 if (UsesDotPrefix) 503 os << "."; 504 os << amt; 505 break; 506 } 507 } 508 509 bool FormatSpecifier::hasValidLengthModifier() const { 510 switch (LM.getKind()) { 511 case LengthModifier::None: 512 return true; 513 514 // Handle most integer flags 515 case LengthModifier::AsChar: 516 case LengthModifier::AsShort: 517 case LengthModifier::AsLongLong: 518 case LengthModifier::AsQuad: 519 case LengthModifier::AsIntMax: 520 case LengthModifier::AsSizeT: 521 case LengthModifier::AsPtrDiff: 522 switch (CS.getKind()) { 523 case ConversionSpecifier::dArg: 524 case ConversionSpecifier::iArg: 525 case ConversionSpecifier::oArg: 526 case ConversionSpecifier::uArg: 527 case ConversionSpecifier::xArg: 528 case ConversionSpecifier::XArg: 529 case ConversionSpecifier::nArg: 530 return true; 531 default: 532 return false; 533 } 534 535 // Handle 'l' flag 536 case LengthModifier::AsLong: 537 switch (CS.getKind()) { 538 case ConversionSpecifier::dArg: 539 case ConversionSpecifier::iArg: 540 case ConversionSpecifier::oArg: 541 case ConversionSpecifier::uArg: 542 case ConversionSpecifier::xArg: 543 case ConversionSpecifier::XArg: 544 case ConversionSpecifier::aArg: 545 case ConversionSpecifier::AArg: 546 case ConversionSpecifier::fArg: 547 case ConversionSpecifier::FArg: 548 case ConversionSpecifier::eArg: 549 case ConversionSpecifier::EArg: 550 case ConversionSpecifier::gArg: 551 case ConversionSpecifier::GArg: 552 case ConversionSpecifier::nArg: 553 case ConversionSpecifier::cArg: 554 case ConversionSpecifier::sArg: 555 case ConversionSpecifier::ScanListArg: 556 return true; 557 default: 558 return false; 559 } 560 561 case LengthModifier::AsLongDouble: 562 switch (CS.getKind()) { 563 case ConversionSpecifier::aArg: 564 case ConversionSpecifier::AArg: 565 case ConversionSpecifier::fArg: 566 case ConversionSpecifier::FArg: 567 case ConversionSpecifier::eArg: 568 case ConversionSpecifier::EArg: 569 case ConversionSpecifier::gArg: 570 case ConversionSpecifier::GArg: 571 return true; 572 // GNU extension. 573 case ConversionSpecifier::dArg: 574 case ConversionSpecifier::iArg: 575 case ConversionSpecifier::oArg: 576 case ConversionSpecifier::uArg: 577 case ConversionSpecifier::xArg: 578 case ConversionSpecifier::XArg: 579 return true; 580 default: 581 return false; 582 } 583 584 case LengthModifier::AsAllocate: 585 switch (CS.getKind()) { 586 case ConversionSpecifier::sArg: 587 case ConversionSpecifier::SArg: 588 case ConversionSpecifier::ScanListArg: 589 return true; 590 default: 591 return false; 592 } 593 594 case LengthModifier::AsMAllocate: 595 switch (CS.getKind()) { 596 case ConversionSpecifier::cArg: 597 case ConversionSpecifier::CArg: 598 case ConversionSpecifier::sArg: 599 case ConversionSpecifier::SArg: 600 case ConversionSpecifier::ScanListArg: 601 return true; 602 default: 603 return false; 604 } 605 } 606 llvm_unreachable("Invalid LengthModifier Kind!"); 607 } 608 609 bool FormatSpecifier::hasStandardLengthModifier() const { 610 switch (LM.getKind()) { 611 case LengthModifier::None: 612 case LengthModifier::AsChar: 613 case LengthModifier::AsShort: 614 case LengthModifier::AsLong: 615 case LengthModifier::AsLongLong: 616 case LengthModifier::AsIntMax: 617 case LengthModifier::AsSizeT: 618 case LengthModifier::AsPtrDiff: 619 case LengthModifier::AsLongDouble: 620 return true; 621 case LengthModifier::AsAllocate: 622 case LengthModifier::AsMAllocate: 623 case LengthModifier::AsQuad: 624 return false; 625 } 626 llvm_unreachable("Invalid LengthModifier Kind!"); 627 } 628 629 bool FormatSpecifier::hasStandardConversionSpecifier(const LangOptions &LangOpt) const { 630 switch (CS.getKind()) { 631 case ConversionSpecifier::cArg: 632 case ConversionSpecifier::dArg: 633 case ConversionSpecifier::iArg: 634 case ConversionSpecifier::oArg: 635 case ConversionSpecifier::uArg: 636 case ConversionSpecifier::xArg: 637 case ConversionSpecifier::XArg: 638 case ConversionSpecifier::fArg: 639 case ConversionSpecifier::FArg: 640 case ConversionSpecifier::eArg: 641 case ConversionSpecifier::EArg: 642 case ConversionSpecifier::gArg: 643 case ConversionSpecifier::GArg: 644 case ConversionSpecifier::aArg: 645 case ConversionSpecifier::AArg: 646 case ConversionSpecifier::sArg: 647 case ConversionSpecifier::pArg: 648 case ConversionSpecifier::nArg: 649 case ConversionSpecifier::ObjCObjArg: 650 case ConversionSpecifier::ScanListArg: 651 case ConversionSpecifier::PercentArg: 652 return true; 653 case ConversionSpecifier::CArg: 654 case ConversionSpecifier::SArg: 655 return LangOpt.ObjC1 || LangOpt.ObjC2; 656 case ConversionSpecifier::InvalidSpecifier: 657 case ConversionSpecifier::PrintErrno: 658 return false; 659 } 660 llvm_unreachable("Invalid ConversionSpecifier Kind!"); 661 } 662 663 bool FormatSpecifier::hasStandardLengthConversionCombination() const { 664 if (LM.getKind() == LengthModifier::AsLongDouble) { 665 switch(CS.getKind()) { 666 case ConversionSpecifier::dArg: 667 case ConversionSpecifier::iArg: 668 case ConversionSpecifier::oArg: 669 case ConversionSpecifier::uArg: 670 case ConversionSpecifier::xArg: 671 case ConversionSpecifier::XArg: 672 return false; 673 default: 674 return true; 675 } 676 } 677 return true; 678 } 679