1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Handling of format string in scanf and friends. The structure of format 11 // strings for fscanf() are described in C99 7.19.6.2. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/Analyses/FormatString.h" 16 #include "FormatStringParsing.h" 17 18 using clang::analyze_format_string::ArgTypeResult; 19 using clang::analyze_format_string::FormatStringHandler; 20 using clang::analyze_format_string::LengthModifier; 21 using clang::analyze_format_string::OptionalAmount; 22 using clang::analyze_format_string::ConversionSpecifier; 23 using clang::analyze_scanf::ScanfArgTypeResult; 24 using clang::analyze_scanf::ScanfConversionSpecifier; 25 using clang::analyze_scanf::ScanfSpecifier; 26 using clang::UpdateOnReturn; 27 using namespace clang; 28 29 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 30 ScanfSpecifierResult; 31 32 static bool ParseScanList(FormatStringHandler &H, 33 ScanfConversionSpecifier &CS, 34 const char *&Beg, const char *E) { 35 const char *I = Beg; 36 const char *start = I - 1; 37 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 38 39 // No more characters? 40 if (I == E) { 41 H.HandleIncompleteScanList(start, I); 42 return true; 43 } 44 45 // Special case: ']' is the first character. 46 if (*I == ']') { 47 if (++I == E) { 48 H.HandleIncompleteScanList(start, I - 1); 49 return true; 50 } 51 } 52 53 // Look for a ']' character which denotes the end of the scan list. 54 while (*I != ']') { 55 if (++I == E) { 56 H.HandleIncompleteScanList(start, I - 1); 57 return true; 58 } 59 } 60 61 CS.setEndScanList(I); 62 return false; 63 } 64 65 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 66 // We can possibly refactor. 67 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 68 const char *&Beg, 69 const char *E, 70 unsigned &argIndex, 71 const LangOptions &LO) { 72 73 using namespace clang::analyze_scanf; 74 const char *I = Beg; 75 const char *Start = 0; 76 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 77 78 // Look for a '%' character that indicates the start of a format specifier. 79 for ( ; I != E ; ++I) { 80 char c = *I; 81 if (c == '\0') { 82 // Detect spurious null characters, which are likely errors. 83 H.HandleNullChar(I); 84 return true; 85 } 86 if (c == '%') { 87 Start = I++; // Record the start of the format specifier. 88 break; 89 } 90 } 91 92 // No format specifier found? 93 if (!Start) 94 return false; 95 96 if (I == E) { 97 // No more characters left? 98 H.HandleIncompleteSpecifier(Start, E - Start); 99 return true; 100 } 101 102 ScanfSpecifier FS; 103 if (ParseArgPosition(H, FS, Start, I, E)) 104 return true; 105 106 if (I == E) { 107 // No more characters left? 108 H.HandleIncompleteSpecifier(Start, E - Start); 109 return true; 110 } 111 112 // Look for '*' flag if it is present. 113 if (*I == '*') { 114 FS.setSuppressAssignment(I); 115 if (++I == E) { 116 H.HandleIncompleteSpecifier(Start, E - Start); 117 return true; 118 } 119 } 120 121 // Look for the field width (if any). Unlike printf, this is either 122 // a fixed integer or isn't present. 123 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 124 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 125 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 126 FS.setFieldWidth(Amt); 127 128 if (I == E) { 129 // No more characters left? 130 H.HandleIncompleteSpecifier(Start, E - Start); 131 return true; 132 } 133 } 134 135 // Look for the length modifier. 136 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) { 137 // No more characters left? 138 H.HandleIncompleteSpecifier(Start, E - Start); 139 return true; 140 } 141 142 // Detect spurious null characters, which are likely errors. 143 if (*I == '\0') { 144 H.HandleNullChar(I); 145 return true; 146 } 147 148 // Finally, look for the conversion specifier. 149 const char *conversionPosition = I++; 150 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 151 switch (*conversionPosition) { 152 default: 153 break; 154 case '%': k = ConversionSpecifier::PercentArg; break; 155 case 'A': k = ConversionSpecifier::AArg; break; 156 case 'E': k = ConversionSpecifier::EArg; break; 157 case 'F': k = ConversionSpecifier::FArg; break; 158 case 'G': k = ConversionSpecifier::GArg; break; 159 case 'X': k = ConversionSpecifier::XArg; break; 160 case 'a': k = ConversionSpecifier::aArg; break; 161 case 'd': k = ConversionSpecifier::dArg; break; 162 case 'e': k = ConversionSpecifier::eArg; break; 163 case 'f': k = ConversionSpecifier::fArg; break; 164 case 'g': k = ConversionSpecifier::gArg; break; 165 case 'i': k = ConversionSpecifier::iArg; break; 166 case 'n': k = ConversionSpecifier::nArg; break; 167 case 'c': k = ConversionSpecifier::cArg; break; 168 case 'C': k = ConversionSpecifier::CArg; break; 169 case 'S': k = ConversionSpecifier::SArg; break; 170 case '[': k = ConversionSpecifier::ScanListArg; break; 171 case 'u': k = ConversionSpecifier::uArg; break; 172 case 'x': k = ConversionSpecifier::xArg; break; 173 case 'o': k = ConversionSpecifier::oArg; break; 174 case 's': k = ConversionSpecifier::sArg; break; 175 case 'p': k = ConversionSpecifier::pArg; break; 176 } 177 ScanfConversionSpecifier CS(conversionPosition, k); 178 if (k == ScanfConversionSpecifier::ScanListArg) { 179 if (ParseScanList(H, CS, I, E)) 180 return true; 181 } 182 FS.setConversionSpecifier(CS); 183 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 184 && !FS.usesPositionalArg()) 185 FS.setArgIndex(argIndex++); 186 187 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 188 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 189 190 if (k == ScanfConversionSpecifier::InvalidSpecifier) { 191 // Assume the conversion takes one argument. 192 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 193 } 194 return ScanfSpecifierResult(Start, FS); 195 } 196 197 ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const { 198 const ScanfConversionSpecifier &CS = getConversionSpecifier(); 199 200 if (!CS.consumesDataArgument()) 201 return ScanfArgTypeResult::Invalid(); 202 203 switch(CS.getKind()) { 204 // Signed int. 205 case ConversionSpecifier::dArg: 206 case ConversionSpecifier::iArg: 207 switch (LM.getKind()) { 208 case LengthModifier::None: return ArgTypeResult(Ctx.IntTy); 209 case LengthModifier::AsChar: 210 return ArgTypeResult(ArgTypeResult::AnyCharTy); 211 case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy); 212 case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy); 213 case LengthModifier::AsLongLong: 214 case LengthModifier::AsQuad: 215 return ArgTypeResult(Ctx.LongLongTy); 216 case LengthModifier::AsIntMax: 217 return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *"); 218 case LengthModifier::AsSizeT: 219 // FIXME: ssize_t. 220 return ScanfArgTypeResult(); 221 case LengthModifier::AsPtrDiff: 222 return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *"); 223 case LengthModifier::AsLongDouble: 224 // GNU extension. 225 return ArgTypeResult(Ctx.LongLongTy); 226 case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid(); 227 case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid(); 228 } 229 230 // Unsigned int. 231 case ConversionSpecifier::oArg: 232 case ConversionSpecifier::uArg: 233 case ConversionSpecifier::xArg: 234 case ConversionSpecifier::XArg: 235 switch (LM.getKind()) { 236 case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy); 237 case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy); 238 case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy); 239 case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy); 240 case LengthModifier::AsLongLong: 241 case LengthModifier::AsQuad: 242 return ArgTypeResult(Ctx.UnsignedLongLongTy); 243 case LengthModifier::AsIntMax: 244 return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *"); 245 case LengthModifier::AsSizeT: 246 return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *"); 247 case LengthModifier::AsPtrDiff: 248 // FIXME: Unsigned version of ptrdiff_t? 249 return ScanfArgTypeResult(); 250 case LengthModifier::AsLongDouble: 251 // GNU extension. 252 return ArgTypeResult(Ctx.UnsignedLongLongTy); 253 case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid(); 254 case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid(); 255 } 256 257 // Float. 258 case ConversionSpecifier::aArg: 259 case ConversionSpecifier::AArg: 260 case ConversionSpecifier::eArg: 261 case ConversionSpecifier::EArg: 262 case ConversionSpecifier::fArg: 263 case ConversionSpecifier::FArg: 264 case ConversionSpecifier::gArg: 265 case ConversionSpecifier::GArg: 266 switch (LM.getKind()) { 267 case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy); 268 case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy); 269 case LengthModifier::AsLongDouble: 270 return ArgTypeResult(Ctx.LongDoubleTy); 271 default: 272 return ScanfArgTypeResult::Invalid(); 273 } 274 275 // Char, string and scanlist. 276 case ConversionSpecifier::cArg: 277 case ConversionSpecifier::sArg: 278 case ConversionSpecifier::ScanListArg: 279 switch (LM.getKind()) { 280 case LengthModifier::None: return ScanfArgTypeResult::CStrTy; 281 case LengthModifier::AsLong: 282 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); 283 case LengthModifier::AsAllocate: 284 case LengthModifier::AsMAllocate: 285 return ScanfArgTypeResult(ArgTypeResult::CStrTy); 286 default: 287 return ScanfArgTypeResult::Invalid(); 288 } 289 case ConversionSpecifier::CArg: 290 case ConversionSpecifier::SArg: 291 // FIXME: Mac OS X specific? 292 switch (LM.getKind()) { 293 case LengthModifier::None: 294 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); 295 case LengthModifier::AsAllocate: 296 case LengthModifier::AsMAllocate: 297 return ScanfArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t **"); 298 default: 299 return ScanfArgTypeResult::Invalid(); 300 } 301 302 // Pointer. 303 case ConversionSpecifier::pArg: 304 return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy)); 305 306 default: 307 break; 308 } 309 310 return ScanfArgTypeResult(); 311 } 312 313 bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, 314 ASTContext &Ctx) { 315 if (!QT->isPointerType()) 316 return false; 317 318 QualType PT = QT->getPointeeType(); 319 const BuiltinType *BT = PT->getAs<BuiltinType>(); 320 if (!BT) 321 return false; 322 323 // Pointer to a character. 324 if (PT->isAnyCharacterType()) { 325 CS.setKind(ConversionSpecifier::sArg); 326 if (PT->isWideCharType()) 327 LM.setKind(LengthModifier::AsWideChar); 328 else 329 LM.setKind(LengthModifier::None); 330 return true; 331 } 332 333 // Figure out the length modifier. 334 switch (BT->getKind()) { 335 // no modifier 336 case BuiltinType::UInt: 337 case BuiltinType::Int: 338 case BuiltinType::Float: 339 LM.setKind(LengthModifier::None); 340 break; 341 342 // hh 343 case BuiltinType::Char_U: 344 case BuiltinType::UChar: 345 case BuiltinType::Char_S: 346 case BuiltinType::SChar: 347 LM.setKind(LengthModifier::AsChar); 348 break; 349 350 // h 351 case BuiltinType::Short: 352 case BuiltinType::UShort: 353 LM.setKind(LengthModifier::AsShort); 354 break; 355 356 // l 357 case BuiltinType::Long: 358 case BuiltinType::ULong: 359 case BuiltinType::Double: 360 LM.setKind(LengthModifier::AsLong); 361 break; 362 363 // ll 364 case BuiltinType::LongLong: 365 case BuiltinType::ULongLong: 366 LM.setKind(LengthModifier::AsLongLong); 367 break; 368 369 // L 370 case BuiltinType::LongDouble: 371 LM.setKind(LengthModifier::AsLongDouble); 372 break; 373 374 // Don't know. 375 default: 376 return false; 377 } 378 379 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 380 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) { 381 const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier(); 382 if (Identifier->getName() == "size_t") { 383 LM.setKind(LengthModifier::AsSizeT); 384 } else if (Identifier->getName() == "ssize_t") { 385 // Not C99, but common in Unix. 386 LM.setKind(LengthModifier::AsSizeT); 387 } else if (Identifier->getName() == "intmax_t") { 388 LM.setKind(LengthModifier::AsIntMax); 389 } else if (Identifier->getName() == "uintmax_t") { 390 LM.setKind(LengthModifier::AsIntMax); 391 } else if (Identifier->getName() == "ptrdiff_t") { 392 LM.setKind(LengthModifier::AsPtrDiff); 393 } 394 } 395 396 // If fixing the length modifier was enough, we are done. 397 const analyze_scanf::ScanfArgTypeResult &ATR = getArgType(Ctx); 398 if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT)) 399 return true; 400 401 // Figure out the conversion specifier. 402 if (PT->isRealFloatingType()) 403 CS.setKind(ConversionSpecifier::fArg); 404 else if (PT->isSignedIntegerType()) 405 CS.setKind(ConversionSpecifier::dArg); 406 else if (PT->isUnsignedIntegerType()) 407 CS.setKind(ConversionSpecifier::uArg); 408 else 409 llvm_unreachable("Unexpected type"); 410 411 return true; 412 } 413 414 void ScanfSpecifier::toString(raw_ostream &os) const { 415 os << "%"; 416 417 if (usesPositionalArg()) 418 os << getPositionalArgIndex() << "$"; 419 if (SuppressAssignment) 420 os << "*"; 421 422 FieldWidth.toString(os); 423 os << LM.toString(); 424 os << CS.toString(); 425 } 426 427 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 428 const char *I, 429 const char *E, 430 const LangOptions &LO) { 431 432 unsigned argIndex = 0; 433 434 // Keep looking for a format specifier until we have exhausted the string. 435 while (I != E) { 436 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 437 LO); 438 // Did a fail-stop error of any kind occur when parsing the specifier? 439 // If so, don't do any more processing. 440 if (FSR.shouldStop()) 441 return true;; 442 // Did we exhaust the string or encounter an error that 443 // we can recover from? 444 if (!FSR.hasValue()) 445 continue; 446 // We have a format specifier. Pass it to the callback. 447 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 448 I - FSR.getStart())) { 449 return true; 450 } 451 } 452 assert(I == E && "Format string not exhausted"); 453 return false; 454 } 455 456 bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const { 457 switch (K) { 458 case InvalidTy: 459 llvm_unreachable("ArgTypeResult must be valid"); 460 case UnknownTy: 461 return true; 462 case CStrTy: 463 return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy); 464 case WCStrTy: 465 return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy); 466 case PtrToArgTypeResultTy: { 467 const PointerType *PT = argTy->getAs<PointerType>(); 468 if (!PT) 469 return false; 470 return A.matchesType(C, PT->getPointeeType()); 471 } 472 } 473 474 llvm_unreachable("Invalid ScanfArgTypeResult Kind!"); 475 } 476 477 QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const { 478 switch (K) { 479 case InvalidTy: 480 llvm_unreachable("No representative type for Invalid ArgTypeResult"); 481 case UnknownTy: 482 return QualType(); 483 case CStrTy: 484 return C.getPointerType(C.CharTy); 485 case WCStrTy: 486 return C.getPointerType(C.getWCharType()); 487 case PtrToArgTypeResultTy: 488 return C.getPointerType(A.getRepresentativeType(C)); 489 } 490 491 llvm_unreachable("Invalid ScanfArgTypeResult Kind!"); 492 } 493 494 std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const { 495 std::string S = getRepresentativeType(C).getAsString(); 496 if (!Name) 497 return std::string("'") + S + "'"; 498 return std::string("'") + Name + "' (aka '" + S + "')"; 499 } 500