Home | History | Annotate | Download | only in Analysis
      1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Handling of format string in scanf and friends.  The structure of format
     11 // strings for fscanf() are described in C99 7.19.6.2.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "clang/Analysis/Analyses/FormatString.h"
     16 #include "FormatStringParsing.h"
     17 
     18 using clang::analyze_format_string::ArgTypeResult;
     19 using clang::analyze_format_string::FormatStringHandler;
     20 using clang::analyze_format_string::LengthModifier;
     21 using clang::analyze_format_string::OptionalAmount;
     22 using clang::analyze_format_string::ConversionSpecifier;
     23 using clang::analyze_scanf::ScanfArgTypeResult;
     24 using clang::analyze_scanf::ScanfConversionSpecifier;
     25 using clang::analyze_scanf::ScanfSpecifier;
     26 using clang::UpdateOnReturn;
     27 using namespace clang;
     28 
     29 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
     30         ScanfSpecifierResult;
     31 
     32 static bool ParseScanList(FormatStringHandler &H,
     33                           ScanfConversionSpecifier &CS,
     34                           const char *&Beg, const char *E) {
     35   const char *I = Beg;
     36   const char *start = I - 1;
     37   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
     38 
     39   // No more characters?
     40   if (I == E) {
     41     H.HandleIncompleteScanList(start, I);
     42     return true;
     43   }
     44 
     45   // Special case: ']' is the first character.
     46   if (*I == ']') {
     47     if (++I == E) {
     48       H.HandleIncompleteScanList(start, I - 1);
     49       return true;
     50     }
     51   }
     52 
     53   // Look for a ']' character which denotes the end of the scan list.
     54   while (*I != ']') {
     55     if (++I == E) {
     56       H.HandleIncompleteScanList(start, I - 1);
     57       return true;
     58     }
     59   }
     60 
     61   CS.setEndScanList(I);
     62   return false;
     63 }
     64 
     65 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
     66 // We can possibly refactor.
     67 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
     68                                                 const char *&Beg,
     69                                                 const char *E,
     70                                                 unsigned &argIndex,
     71                                                 const LangOptions &LO) {
     72 
     73   using namespace clang::analyze_scanf;
     74   const char *I = Beg;
     75   const char *Start = 0;
     76   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
     77 
     78     // Look for a '%' character that indicates the start of a format specifier.
     79   for ( ; I != E ; ++I) {
     80     char c = *I;
     81     if (c == '\0') {
     82         // Detect spurious null characters, which are likely errors.
     83       H.HandleNullChar(I);
     84       return true;
     85     }
     86     if (c == '%') {
     87       Start = I++;  // Record the start of the format specifier.
     88       break;
     89     }
     90   }
     91 
     92     // No format specifier found?
     93   if (!Start)
     94     return false;
     95 
     96   if (I == E) {
     97       // No more characters left?
     98     H.HandleIncompleteSpecifier(Start, E - Start);
     99     return true;
    100   }
    101 
    102   ScanfSpecifier FS;
    103   if (ParseArgPosition(H, FS, Start, I, E))
    104     return true;
    105 
    106   if (I == E) {
    107       // No more characters left?
    108     H.HandleIncompleteSpecifier(Start, E - Start);
    109     return true;
    110   }
    111 
    112   // Look for '*' flag if it is present.
    113   if (*I == '*') {
    114     FS.setSuppressAssignment(I);
    115     if (++I == E) {
    116       H.HandleIncompleteSpecifier(Start, E - Start);
    117       return true;
    118     }
    119   }
    120 
    121   // Look for the field width (if any).  Unlike printf, this is either
    122   // a fixed integer or isn't present.
    123   const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
    124   if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
    125     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
    126     FS.setFieldWidth(Amt);
    127 
    128     if (I == E) {
    129       // No more characters left?
    130       H.HandleIncompleteSpecifier(Start, E - Start);
    131       return true;
    132     }
    133   }
    134 
    135   // Look for the length modifier.
    136   if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
    137       // No more characters left?
    138     H.HandleIncompleteSpecifier(Start, E - Start);
    139     return true;
    140   }
    141 
    142   // Detect spurious null characters, which are likely errors.
    143   if (*I == '\0') {
    144     H.HandleNullChar(I);
    145     return true;
    146   }
    147 
    148   // Finally, look for the conversion specifier.
    149   const char *conversionPosition = I++;
    150   ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
    151   switch (*conversionPosition) {
    152     default:
    153       break;
    154     case '%': k = ConversionSpecifier::PercentArg;   break;
    155     case 'A': k = ConversionSpecifier::AArg; break;
    156     case 'E': k = ConversionSpecifier::EArg; break;
    157     case 'F': k = ConversionSpecifier::FArg; break;
    158     case 'G': k = ConversionSpecifier::GArg; break;
    159     case 'X': k = ConversionSpecifier::XArg; break;
    160     case 'a': k = ConversionSpecifier::aArg; break;
    161     case 'd': k = ConversionSpecifier::dArg; break;
    162     case 'e': k = ConversionSpecifier::eArg; break;
    163     case 'f': k = ConversionSpecifier::fArg; break;
    164     case 'g': k = ConversionSpecifier::gArg; break;
    165     case 'i': k = ConversionSpecifier::iArg; break;
    166     case 'n': k = ConversionSpecifier::nArg; break;
    167     case 'c': k = ConversionSpecifier::cArg; break;
    168     case 'C': k = ConversionSpecifier::CArg; break;
    169     case 'S': k = ConversionSpecifier::SArg; break;
    170     case '[': k = ConversionSpecifier::ScanListArg; break;
    171     case 'u': k = ConversionSpecifier::uArg; break;
    172     case 'x': k = ConversionSpecifier::xArg; break;
    173     case 'o': k = ConversionSpecifier::oArg; break;
    174     case 's': k = ConversionSpecifier::sArg; break;
    175     case 'p': k = ConversionSpecifier::pArg; break;
    176   }
    177   ScanfConversionSpecifier CS(conversionPosition, k);
    178   if (k == ScanfConversionSpecifier::ScanListArg) {
    179     if (ParseScanList(H, CS, I, E))
    180       return true;
    181   }
    182   FS.setConversionSpecifier(CS);
    183   if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
    184       && !FS.usesPositionalArg())
    185     FS.setArgIndex(argIndex++);
    186 
    187   // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
    188   // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
    189 
    190   if (k == ScanfConversionSpecifier::InvalidSpecifier) {
    191     // Assume the conversion takes one argument.
    192     return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
    193   }
    194   return ScanfSpecifierResult(Start, FS);
    195 }
    196 
    197 ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
    198   const ScanfConversionSpecifier &CS = getConversionSpecifier();
    199 
    200   if (!CS.consumesDataArgument())
    201     return ScanfArgTypeResult::Invalid();
    202 
    203   switch(CS.getKind()) {
    204     // Signed int.
    205     case ConversionSpecifier::dArg:
    206     case ConversionSpecifier::iArg:
    207       switch (LM.getKind()) {
    208         case LengthModifier::None: return ArgTypeResult(Ctx.IntTy);
    209         case LengthModifier::AsChar:
    210           return ArgTypeResult(ArgTypeResult::AnyCharTy);
    211         case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy);
    212         case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy);
    213         case LengthModifier::AsLongLong:
    214         case LengthModifier::AsQuad:
    215           return ArgTypeResult(Ctx.LongLongTy);
    216         case LengthModifier::AsIntMax:
    217           return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *");
    218         case LengthModifier::AsSizeT:
    219           // FIXME: ssize_t.
    220           return ScanfArgTypeResult();
    221         case LengthModifier::AsPtrDiff:
    222           return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *");
    223         case LengthModifier::AsLongDouble:
    224           // GNU extension.
    225           return ArgTypeResult(Ctx.LongLongTy);
    226         case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
    227         case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
    228       }
    229 
    230     // Unsigned int.
    231     case ConversionSpecifier::oArg:
    232     case ConversionSpecifier::uArg:
    233     case ConversionSpecifier::xArg:
    234     case ConversionSpecifier::XArg:
    235       switch (LM.getKind()) {
    236         case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy);
    237         case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy);
    238         case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy);
    239         case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy);
    240         case LengthModifier::AsLongLong:
    241         case LengthModifier::AsQuad:
    242           return ArgTypeResult(Ctx.UnsignedLongLongTy);
    243         case LengthModifier::AsIntMax:
    244           return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *");
    245         case LengthModifier::AsSizeT:
    246           return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *");
    247         case LengthModifier::AsPtrDiff:
    248           // FIXME: Unsigned version of ptrdiff_t?
    249           return ScanfArgTypeResult();
    250         case LengthModifier::AsLongDouble:
    251           // GNU extension.
    252           return ArgTypeResult(Ctx.UnsignedLongLongTy);
    253         case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
    254         case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
    255       }
    256 
    257     // Float.
    258     case ConversionSpecifier::aArg:
    259     case ConversionSpecifier::AArg:
    260     case ConversionSpecifier::eArg:
    261     case ConversionSpecifier::EArg:
    262     case ConversionSpecifier::fArg:
    263     case ConversionSpecifier::FArg:
    264     case ConversionSpecifier::gArg:
    265     case ConversionSpecifier::GArg:
    266       switch (LM.getKind()) {
    267         case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy);
    268         case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy);
    269         case LengthModifier::AsLongDouble:
    270           return ArgTypeResult(Ctx.LongDoubleTy);
    271         default:
    272           return ScanfArgTypeResult::Invalid();
    273       }
    274 
    275     // Char, string and scanlist.
    276     case ConversionSpecifier::cArg:
    277     case ConversionSpecifier::sArg:
    278     case ConversionSpecifier::ScanListArg:
    279       switch (LM.getKind()) {
    280         case LengthModifier::None: return ScanfArgTypeResult::CStrTy;
    281         case LengthModifier::AsLong:
    282           return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
    283         case LengthModifier::AsAllocate:
    284         case LengthModifier::AsMAllocate:
    285           return ScanfArgTypeResult(ArgTypeResult::CStrTy);
    286         default:
    287           return ScanfArgTypeResult::Invalid();
    288       }
    289     case ConversionSpecifier::CArg:
    290     case ConversionSpecifier::SArg:
    291       // FIXME: Mac OS X specific?
    292       switch (LM.getKind()) {
    293         case LengthModifier::None:
    294           return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
    295         case LengthModifier::AsAllocate:
    296         case LengthModifier::AsMAllocate:
    297           return ScanfArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t **");
    298         default:
    299           return ScanfArgTypeResult::Invalid();
    300       }
    301 
    302     // Pointer.
    303     case ConversionSpecifier::pArg:
    304       return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy));
    305 
    306     default:
    307       break;
    308   }
    309 
    310   return ScanfArgTypeResult();
    311 }
    312 
    313 bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
    314                              ASTContext &Ctx) {
    315   if (!QT->isPointerType())
    316     return false;
    317 
    318   QualType PT = QT->getPointeeType();
    319   const BuiltinType *BT = PT->getAs<BuiltinType>();
    320   if (!BT)
    321     return false;
    322 
    323   // Pointer to a character.
    324   if (PT->isAnyCharacterType()) {
    325     CS.setKind(ConversionSpecifier::sArg);
    326     if (PT->isWideCharType())
    327       LM.setKind(LengthModifier::AsWideChar);
    328     else
    329       LM.setKind(LengthModifier::None);
    330     return true;
    331   }
    332 
    333   // Figure out the length modifier.
    334   switch (BT->getKind()) {
    335     // no modifier
    336     case BuiltinType::UInt:
    337     case BuiltinType::Int:
    338     case BuiltinType::Float:
    339       LM.setKind(LengthModifier::None);
    340       break;
    341 
    342     // hh
    343     case BuiltinType::Char_U:
    344     case BuiltinType::UChar:
    345     case BuiltinType::Char_S:
    346     case BuiltinType::SChar:
    347       LM.setKind(LengthModifier::AsChar);
    348       break;
    349 
    350     // h
    351     case BuiltinType::Short:
    352     case BuiltinType::UShort:
    353       LM.setKind(LengthModifier::AsShort);
    354       break;
    355 
    356     // l
    357     case BuiltinType::Long:
    358     case BuiltinType::ULong:
    359     case BuiltinType::Double:
    360       LM.setKind(LengthModifier::AsLong);
    361       break;
    362 
    363     // ll
    364     case BuiltinType::LongLong:
    365     case BuiltinType::ULongLong:
    366       LM.setKind(LengthModifier::AsLongLong);
    367       break;
    368 
    369     // L
    370     case BuiltinType::LongDouble:
    371       LM.setKind(LengthModifier::AsLongDouble);
    372       break;
    373 
    374     // Don't know.
    375     default:
    376       return false;
    377   }
    378 
    379   // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
    380   if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
    381     const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
    382     if (Identifier->getName() == "size_t") {
    383       LM.setKind(LengthModifier::AsSizeT);
    384     } else if (Identifier->getName() == "ssize_t") {
    385       // Not C99, but common in Unix.
    386       LM.setKind(LengthModifier::AsSizeT);
    387     } else if (Identifier->getName() == "intmax_t") {
    388       LM.setKind(LengthModifier::AsIntMax);
    389     } else if (Identifier->getName() == "uintmax_t") {
    390       LM.setKind(LengthModifier::AsIntMax);
    391     } else if (Identifier->getName() == "ptrdiff_t") {
    392       LM.setKind(LengthModifier::AsPtrDiff);
    393     }
    394   }
    395 
    396   // If fixing the length modifier was enough, we are done.
    397   const analyze_scanf::ScanfArgTypeResult &ATR = getArgType(Ctx);
    398   if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT))
    399     return true;
    400 
    401   // Figure out the conversion specifier.
    402   if (PT->isRealFloatingType())
    403     CS.setKind(ConversionSpecifier::fArg);
    404   else if (PT->isSignedIntegerType())
    405     CS.setKind(ConversionSpecifier::dArg);
    406   else if (PT->isUnsignedIntegerType())
    407     CS.setKind(ConversionSpecifier::uArg);
    408   else
    409     llvm_unreachable("Unexpected type");
    410 
    411   return true;
    412 }
    413 
    414 void ScanfSpecifier::toString(raw_ostream &os) const {
    415   os << "%";
    416 
    417   if (usesPositionalArg())
    418     os << getPositionalArgIndex() << "$";
    419   if (SuppressAssignment)
    420     os << "*";
    421 
    422   FieldWidth.toString(os);
    423   os << LM.toString();
    424   os << CS.toString();
    425 }
    426 
    427 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
    428                                                     const char *I,
    429                                                     const char *E,
    430                                                     const LangOptions &LO) {
    431 
    432   unsigned argIndex = 0;
    433 
    434   // Keep looking for a format specifier until we have exhausted the string.
    435   while (I != E) {
    436     const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
    437                                                           LO);
    438     // Did a fail-stop error of any kind occur when parsing the specifier?
    439     // If so, don't do any more processing.
    440     if (FSR.shouldStop())
    441       return true;;
    442       // Did we exhaust the string or encounter an error that
    443       // we can recover from?
    444     if (!FSR.hasValue())
    445       continue;
    446       // We have a format specifier.  Pass it to the callback.
    447     if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
    448                                 I - FSR.getStart())) {
    449       return true;
    450     }
    451   }
    452   assert(I == E && "Format string not exhausted");
    453   return false;
    454 }
    455 
    456 bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const {
    457   switch (K) {
    458     case InvalidTy:
    459       llvm_unreachable("ArgTypeResult must be valid");
    460     case UnknownTy:
    461       return true;
    462     case CStrTy:
    463       return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy);
    464     case WCStrTy:
    465       return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy);
    466     case PtrToArgTypeResultTy: {
    467       const PointerType *PT = argTy->getAs<PointerType>();
    468       if (!PT)
    469         return false;
    470       return A.matchesType(C, PT->getPointeeType());
    471     }
    472   }
    473 
    474   llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
    475 }
    476 
    477 QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const {
    478   switch (K) {
    479     case InvalidTy:
    480       llvm_unreachable("No representative type for Invalid ArgTypeResult");
    481     case UnknownTy:
    482       return QualType();
    483     case CStrTy:
    484       return C.getPointerType(C.CharTy);
    485     case WCStrTy:
    486       return C.getPointerType(C.getWCharType());
    487     case PtrToArgTypeResultTy:
    488       return C.getPointerType(A.getRepresentativeType(C));
    489   }
    490 
    491   llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
    492 }
    493 
    494 std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const {
    495   std::string S = getRepresentativeType(C).getAsString();
    496   if (!Name)
    497     return std::string("'") + S + "'";
    498   return std::string("'") + Name + "' (aka '" + S + "')";
    499 }
    500