1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Handling of format string in scanf and friends. The structure of format 11 // strings for fscanf() are described in C99 7.19.6.2. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/Analyses/FormatString.h" 16 #include "FormatStringParsing.h" 17 #include "clang/Basic/TargetInfo.h" 18 19 using clang::analyze_format_string::ArgType; 20 using clang::analyze_format_string::FormatStringHandler; 21 using clang::analyze_format_string::LengthModifier; 22 using clang::analyze_format_string::OptionalAmount; 23 using clang::analyze_format_string::ConversionSpecifier; 24 using clang::analyze_scanf::ScanfConversionSpecifier; 25 using clang::analyze_scanf::ScanfSpecifier; 26 using clang::UpdateOnReturn; 27 using namespace clang; 28 29 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 30 ScanfSpecifierResult; 31 32 static bool ParseScanList(FormatStringHandler &H, 33 ScanfConversionSpecifier &CS, 34 const char *&Beg, const char *E) { 35 const char *I = Beg; 36 const char *start = I - 1; 37 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 38 39 // No more characters? 40 if (I == E) { 41 H.HandleIncompleteScanList(start, I); 42 return true; 43 } 44 45 // Special case: ']' is the first character. 46 if (*I == ']') { 47 if (++I == E) { 48 H.HandleIncompleteScanList(start, I - 1); 49 return true; 50 } 51 } 52 53 // Look for a ']' character which denotes the end of the scan list. 54 while (*I != ']') { 55 if (++I == E) { 56 H.HandleIncompleteScanList(start, I - 1); 57 return true; 58 } 59 } 60 61 CS.setEndScanList(I); 62 return false; 63 } 64 65 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 66 // We can possibly refactor. 67 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 68 const char *&Beg, 69 const char *E, 70 unsigned &argIndex, 71 const LangOptions &LO, 72 const TargetInfo &Target) { 73 74 using namespace clang::analyze_scanf; 75 const char *I = Beg; 76 const char *Start = 0; 77 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 78 79 // Look for a '%' character that indicates the start of a format specifier. 80 for ( ; I != E ; ++I) { 81 char c = *I; 82 if (c == '\0') { 83 // Detect spurious null characters, which are likely errors. 84 H.HandleNullChar(I); 85 return true; 86 } 87 if (c == '%') { 88 Start = I++; // Record the start of the format specifier. 89 break; 90 } 91 } 92 93 // No format specifier found? 94 if (!Start) 95 return false; 96 97 if (I == E) { 98 // No more characters left? 99 H.HandleIncompleteSpecifier(Start, E - Start); 100 return true; 101 } 102 103 ScanfSpecifier FS; 104 if (ParseArgPosition(H, FS, Start, I, E)) 105 return true; 106 107 if (I == E) { 108 // No more characters left? 109 H.HandleIncompleteSpecifier(Start, E - Start); 110 return true; 111 } 112 113 // Look for '*' flag if it is present. 114 if (*I == '*') { 115 FS.setSuppressAssignment(I); 116 if (++I == E) { 117 H.HandleIncompleteSpecifier(Start, E - Start); 118 return true; 119 } 120 } 121 122 // Look for the field width (if any). Unlike printf, this is either 123 // a fixed integer or isn't present. 124 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 125 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 126 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 127 FS.setFieldWidth(Amt); 128 129 if (I == E) { 130 // No more characters left? 131 H.HandleIncompleteSpecifier(Start, E - Start); 132 return true; 133 } 134 } 135 136 // Look for the length modifier. 137 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) { 138 // No more characters left? 139 H.HandleIncompleteSpecifier(Start, E - Start); 140 return true; 141 } 142 143 // Detect spurious null characters, which are likely errors. 144 if (*I == '\0') { 145 H.HandleNullChar(I); 146 return true; 147 } 148 149 // Finally, look for the conversion specifier. 150 const char *conversionPosition = I++; 151 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 152 switch (*conversionPosition) { 153 default: 154 break; 155 case '%': k = ConversionSpecifier::PercentArg; break; 156 case 'A': k = ConversionSpecifier::AArg; break; 157 case 'E': k = ConversionSpecifier::EArg; break; 158 case 'F': k = ConversionSpecifier::FArg; break; 159 case 'G': k = ConversionSpecifier::GArg; break; 160 case 'X': k = ConversionSpecifier::XArg; break; 161 case 'a': k = ConversionSpecifier::aArg; break; 162 case 'd': k = ConversionSpecifier::dArg; break; 163 case 'e': k = ConversionSpecifier::eArg; break; 164 case 'f': k = ConversionSpecifier::fArg; break; 165 case 'g': k = ConversionSpecifier::gArg; break; 166 case 'i': k = ConversionSpecifier::iArg; break; 167 case 'n': k = ConversionSpecifier::nArg; break; 168 case 'c': k = ConversionSpecifier::cArg; break; 169 case 'C': k = ConversionSpecifier::CArg; break; 170 case 'S': k = ConversionSpecifier::SArg; break; 171 case '[': k = ConversionSpecifier::ScanListArg; break; 172 case 'u': k = ConversionSpecifier::uArg; break; 173 case 'x': k = ConversionSpecifier::xArg; break; 174 case 'o': k = ConversionSpecifier::oArg; break; 175 case 's': k = ConversionSpecifier::sArg; break; 176 case 'p': k = ConversionSpecifier::pArg; break; 177 // Apple extensions 178 // Apple-specific 179 case 'D': 180 if (Target.getTriple().isOSDarwin()) 181 k = ConversionSpecifier::DArg; 182 break; 183 case 'O': 184 if (Target.getTriple().isOSDarwin()) 185 k = ConversionSpecifier::OArg; 186 break; 187 case 'U': 188 if (Target.getTriple().isOSDarwin()) 189 k = ConversionSpecifier::UArg; 190 break; 191 } 192 ScanfConversionSpecifier CS(conversionPosition, k); 193 if (k == ScanfConversionSpecifier::ScanListArg) { 194 if (ParseScanList(H, CS, I, E)) 195 return true; 196 } 197 FS.setConversionSpecifier(CS); 198 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 199 && !FS.usesPositionalArg()) 200 FS.setArgIndex(argIndex++); 201 202 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 203 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 204 205 if (k == ScanfConversionSpecifier::InvalidSpecifier) { 206 // Assume the conversion takes one argument. 207 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 208 } 209 return ScanfSpecifierResult(Start, FS); 210 } 211 212 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { 213 const ScanfConversionSpecifier &CS = getConversionSpecifier(); 214 215 if (!CS.consumesDataArgument()) 216 return ArgType::Invalid(); 217 218 switch(CS.getKind()) { 219 // Signed int. 220 case ConversionSpecifier::dArg: 221 case ConversionSpecifier::DArg: 222 case ConversionSpecifier::iArg: 223 switch (LM.getKind()) { 224 case LengthModifier::None: 225 return ArgType::PtrTo(Ctx.IntTy); 226 case LengthModifier::AsChar: 227 return ArgType::PtrTo(ArgType::AnyCharTy); 228 case LengthModifier::AsShort: 229 return ArgType::PtrTo(Ctx.ShortTy); 230 case LengthModifier::AsLong: 231 return ArgType::PtrTo(Ctx.LongTy); 232 case LengthModifier::AsLongLong: 233 case LengthModifier::AsQuad: 234 return ArgType::PtrTo(Ctx.LongLongTy); 235 case LengthModifier::AsIntMax: 236 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 237 case LengthModifier::AsSizeT: 238 // FIXME: ssize_t. 239 return ArgType(); 240 case LengthModifier::AsPtrDiff: 241 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 242 case LengthModifier::AsLongDouble: 243 // GNU extension. 244 return ArgType::PtrTo(Ctx.LongLongTy); 245 case LengthModifier::AsAllocate: 246 return ArgType::Invalid(); 247 case LengthModifier::AsMAllocate: 248 return ArgType::Invalid(); 249 } 250 251 // Unsigned int. 252 case ConversionSpecifier::oArg: 253 case ConversionSpecifier::OArg: 254 case ConversionSpecifier::uArg: 255 case ConversionSpecifier::UArg: 256 case ConversionSpecifier::xArg: 257 case ConversionSpecifier::XArg: 258 switch (LM.getKind()) { 259 case LengthModifier::None: 260 return ArgType::PtrTo(Ctx.UnsignedIntTy); 261 case LengthModifier::AsChar: 262 return ArgType::PtrTo(Ctx.UnsignedCharTy); 263 case LengthModifier::AsShort: 264 return ArgType::PtrTo(Ctx.UnsignedShortTy); 265 case LengthModifier::AsLong: 266 return ArgType::PtrTo(Ctx.UnsignedLongTy); 267 case LengthModifier::AsLongLong: 268 case LengthModifier::AsQuad: 269 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 270 case LengthModifier::AsIntMax: 271 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t")); 272 case LengthModifier::AsSizeT: 273 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t")); 274 case LengthModifier::AsPtrDiff: 275 // FIXME: Unsigned version of ptrdiff_t? 276 return ArgType(); 277 case LengthModifier::AsLongDouble: 278 // GNU extension. 279 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 280 case LengthModifier::AsAllocate: 281 return ArgType::Invalid(); 282 case LengthModifier::AsMAllocate: 283 return ArgType::Invalid(); 284 } 285 286 // Float. 287 case ConversionSpecifier::aArg: 288 case ConversionSpecifier::AArg: 289 case ConversionSpecifier::eArg: 290 case ConversionSpecifier::EArg: 291 case ConversionSpecifier::fArg: 292 case ConversionSpecifier::FArg: 293 case ConversionSpecifier::gArg: 294 case ConversionSpecifier::GArg: 295 switch (LM.getKind()) { 296 case LengthModifier::None: 297 return ArgType::PtrTo(Ctx.FloatTy); 298 case LengthModifier::AsLong: 299 return ArgType::PtrTo(Ctx.DoubleTy); 300 case LengthModifier::AsLongDouble: 301 return ArgType::PtrTo(Ctx.LongDoubleTy); 302 default: 303 return ArgType::Invalid(); 304 } 305 306 // Char, string and scanlist. 307 case ConversionSpecifier::cArg: 308 case ConversionSpecifier::sArg: 309 case ConversionSpecifier::ScanListArg: 310 switch (LM.getKind()) { 311 case LengthModifier::None: 312 return ArgType::PtrTo(ArgType::AnyCharTy); 313 case LengthModifier::AsLong: 314 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 315 case LengthModifier::AsAllocate: 316 case LengthModifier::AsMAllocate: 317 return ArgType::PtrTo(ArgType::CStrTy); 318 default: 319 return ArgType::Invalid(); 320 } 321 case ConversionSpecifier::CArg: 322 case ConversionSpecifier::SArg: 323 // FIXME: Mac OS X specific? 324 switch (LM.getKind()) { 325 case LengthModifier::None: 326 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 327 case LengthModifier::AsAllocate: 328 case LengthModifier::AsMAllocate: 329 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); 330 default: 331 return ArgType::Invalid(); 332 } 333 334 // Pointer. 335 case ConversionSpecifier::pArg: 336 return ArgType::PtrTo(ArgType::CPointerTy); 337 338 // Write-back. 339 case ConversionSpecifier::nArg: 340 switch (LM.getKind()) { 341 case LengthModifier::None: 342 return ArgType::PtrTo(Ctx.IntTy); 343 case LengthModifier::AsChar: 344 return ArgType::PtrTo(Ctx.SignedCharTy); 345 case LengthModifier::AsShort: 346 return ArgType::PtrTo(Ctx.ShortTy); 347 case LengthModifier::AsLong: 348 return ArgType::PtrTo(Ctx.LongTy); 349 case LengthModifier::AsLongLong: 350 case LengthModifier::AsQuad: 351 return ArgType::PtrTo(Ctx.LongLongTy); 352 case LengthModifier::AsIntMax: 353 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 354 case LengthModifier::AsSizeT: 355 return ArgType(); // FIXME: ssize_t 356 case LengthModifier::AsPtrDiff: 357 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 358 case LengthModifier::AsLongDouble: 359 return ArgType(); // FIXME: Is this a known extension? 360 case LengthModifier::AsAllocate: 361 case LengthModifier::AsMAllocate: 362 return ArgType::Invalid(); 363 } 364 365 default: 366 break; 367 } 368 369 return ArgType(); 370 } 371 372 bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, 373 ASTContext &Ctx) { 374 if (!QT->isPointerType()) 375 return false; 376 377 // %n is different from other conversion specifiers; don't try to fix it. 378 if (CS.getKind() == ConversionSpecifier::nArg) 379 return false; 380 381 QualType PT = QT->getPointeeType(); 382 383 // If it's an enum, get its underlying type. 384 if (const EnumType *ETy = QT->getAs<EnumType>()) 385 QT = ETy->getDecl()->getIntegerType(); 386 387 const BuiltinType *BT = PT->getAs<BuiltinType>(); 388 if (!BT) 389 return false; 390 391 // Pointer to a character. 392 if (PT->isAnyCharacterType()) { 393 CS.setKind(ConversionSpecifier::sArg); 394 if (PT->isWideCharType()) 395 LM.setKind(LengthModifier::AsWideChar); 396 else 397 LM.setKind(LengthModifier::None); 398 return true; 399 } 400 401 // Figure out the length modifier. 402 switch (BT->getKind()) { 403 // no modifier 404 case BuiltinType::UInt: 405 case BuiltinType::Int: 406 case BuiltinType::Float: 407 LM.setKind(LengthModifier::None); 408 break; 409 410 // hh 411 case BuiltinType::Char_U: 412 case BuiltinType::UChar: 413 case BuiltinType::Char_S: 414 case BuiltinType::SChar: 415 LM.setKind(LengthModifier::AsChar); 416 break; 417 418 // h 419 case BuiltinType::Short: 420 case BuiltinType::UShort: 421 LM.setKind(LengthModifier::AsShort); 422 break; 423 424 // l 425 case BuiltinType::Long: 426 case BuiltinType::ULong: 427 case BuiltinType::Double: 428 LM.setKind(LengthModifier::AsLong); 429 break; 430 431 // ll 432 case BuiltinType::LongLong: 433 case BuiltinType::ULongLong: 434 LM.setKind(LengthModifier::AsLongLong); 435 break; 436 437 // L 438 case BuiltinType::LongDouble: 439 LM.setKind(LengthModifier::AsLongDouble); 440 break; 441 442 // Don't know. 443 default: 444 return false; 445 } 446 447 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 448 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11)) 449 namedTypeToLengthModifier(PT, LM); 450 451 // If fixing the length modifier was enough, we are done. 452 if (hasValidLengthModifier(Ctx.getTargetInfo())) { 453 const analyze_scanf::ArgType &AT = getArgType(Ctx); 454 if (AT.isValid() && AT.matchesType(Ctx, QT)) 455 return true; 456 } 457 458 // Figure out the conversion specifier. 459 if (PT->isRealFloatingType()) 460 CS.setKind(ConversionSpecifier::fArg); 461 else if (PT->isSignedIntegerType()) 462 CS.setKind(ConversionSpecifier::dArg); 463 else if (PT->isUnsignedIntegerType()) 464 CS.setKind(ConversionSpecifier::uArg); 465 else 466 llvm_unreachable("Unexpected type"); 467 468 return true; 469 } 470 471 void ScanfSpecifier::toString(raw_ostream &os) const { 472 os << "%"; 473 474 if (usesPositionalArg()) 475 os << getPositionalArgIndex() << "$"; 476 if (SuppressAssignment) 477 os << "*"; 478 479 FieldWidth.toString(os); 480 os << LM.toString(); 481 os << CS.toString(); 482 } 483 484 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 485 const char *I, 486 const char *E, 487 const LangOptions &LO, 488 const TargetInfo &Target) { 489 490 unsigned argIndex = 0; 491 492 // Keep looking for a format specifier until we have exhausted the string. 493 while (I != E) { 494 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 495 LO, Target); 496 // Did a fail-stop error of any kind occur when parsing the specifier? 497 // If so, don't do any more processing. 498 if (FSR.shouldStop()) 499 return true; 500 // Did we exhaust the string or encounter an error that 501 // we can recover from? 502 if (!FSR.hasValue()) 503 continue; 504 // We have a format specifier. Pass it to the callback. 505 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 506 I - FSR.getStart())) { 507 return true; 508 } 509 } 510 assert(I == E && "Format string not exhausted"); 511 return false; 512 } 513