Home | History | Annotate | Download | only in Analysis
      1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Handling of format string in scanf and friends.  The structure of format
     11 // strings for fscanf() are described in C99 7.19.6.2.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "clang/Analysis/Analyses/FormatString.h"
     16 #include "FormatStringParsing.h"
     17 
     18 using clang::analyze_format_string::ArgType;
     19 using clang::analyze_format_string::FormatStringHandler;
     20 using clang::analyze_format_string::LengthModifier;
     21 using clang::analyze_format_string::OptionalAmount;
     22 using clang::analyze_format_string::ConversionSpecifier;
     23 using clang::analyze_scanf::ScanfConversionSpecifier;
     24 using clang::analyze_scanf::ScanfSpecifier;
     25 using clang::UpdateOnReturn;
     26 using namespace clang;
     27 
     28 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
     29         ScanfSpecifierResult;
     30 
     31 static bool ParseScanList(FormatStringHandler &H,
     32                           ScanfConversionSpecifier &CS,
     33                           const char *&Beg, const char *E) {
     34   const char *I = Beg;
     35   const char *start = I - 1;
     36   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
     37 
     38   // No more characters?
     39   if (I == E) {
     40     H.HandleIncompleteScanList(start, I);
     41     return true;
     42   }
     43 
     44   // Special case: ']' is the first character.
     45   if (*I == ']') {
     46     if (++I == E) {
     47       H.HandleIncompleteScanList(start, I - 1);
     48       return true;
     49     }
     50   }
     51 
     52   // Look for a ']' character which denotes the end of the scan list.
     53   while (*I != ']') {
     54     if (++I == E) {
     55       H.HandleIncompleteScanList(start, I - 1);
     56       return true;
     57     }
     58   }
     59 
     60   CS.setEndScanList(I);
     61   return false;
     62 }
     63 
     64 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
     65 // We can possibly refactor.
     66 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
     67                                                 const char *&Beg,
     68                                                 const char *E,
     69                                                 unsigned &argIndex,
     70                                                 const LangOptions &LO) {
     71 
     72   using namespace clang::analyze_scanf;
     73   const char *I = Beg;
     74   const char *Start = 0;
     75   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
     76 
     77     // Look for a '%' character that indicates the start of a format specifier.
     78   for ( ; I != E ; ++I) {
     79     char c = *I;
     80     if (c == '\0') {
     81         // Detect spurious null characters, which are likely errors.
     82       H.HandleNullChar(I);
     83       return true;
     84     }
     85     if (c == '%') {
     86       Start = I++;  // Record the start of the format specifier.
     87       break;
     88     }
     89   }
     90 
     91     // No format specifier found?
     92   if (!Start)
     93     return false;
     94 
     95   if (I == E) {
     96       // No more characters left?
     97     H.HandleIncompleteSpecifier(Start, E - Start);
     98     return true;
     99   }
    100 
    101   ScanfSpecifier FS;
    102   if (ParseArgPosition(H, FS, Start, I, E))
    103     return true;
    104 
    105   if (I == E) {
    106       // No more characters left?
    107     H.HandleIncompleteSpecifier(Start, E - Start);
    108     return true;
    109   }
    110 
    111   // Look for '*' flag if it is present.
    112   if (*I == '*') {
    113     FS.setSuppressAssignment(I);
    114     if (++I == E) {
    115       H.HandleIncompleteSpecifier(Start, E - Start);
    116       return true;
    117     }
    118   }
    119 
    120   // Look for the field width (if any).  Unlike printf, this is either
    121   // a fixed integer or isn't present.
    122   const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
    123   if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
    124     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
    125     FS.setFieldWidth(Amt);
    126 
    127     if (I == E) {
    128       // No more characters left?
    129       H.HandleIncompleteSpecifier(Start, E - Start);
    130       return true;
    131     }
    132   }
    133 
    134   // Look for the length modifier.
    135   if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
    136       // No more characters left?
    137     H.HandleIncompleteSpecifier(Start, E - Start);
    138     return true;
    139   }
    140 
    141   // Detect spurious null characters, which are likely errors.
    142   if (*I == '\0') {
    143     H.HandleNullChar(I);
    144     return true;
    145   }
    146 
    147   // Finally, look for the conversion specifier.
    148   const char *conversionPosition = I++;
    149   ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
    150   switch (*conversionPosition) {
    151     default:
    152       break;
    153     case '%': k = ConversionSpecifier::PercentArg;   break;
    154     case 'A': k = ConversionSpecifier::AArg; break;
    155     case 'E': k = ConversionSpecifier::EArg; break;
    156     case 'F': k = ConversionSpecifier::FArg; break;
    157     case 'G': k = ConversionSpecifier::GArg; break;
    158     case 'X': k = ConversionSpecifier::XArg; break;
    159     case 'a': k = ConversionSpecifier::aArg; break;
    160     case 'd': k = ConversionSpecifier::dArg; break;
    161     case 'e': k = ConversionSpecifier::eArg; break;
    162     case 'f': k = ConversionSpecifier::fArg; break;
    163     case 'g': k = ConversionSpecifier::gArg; break;
    164     case 'i': k = ConversionSpecifier::iArg; break;
    165     case 'n': k = ConversionSpecifier::nArg; break;
    166     case 'c': k = ConversionSpecifier::cArg; break;
    167     case 'C': k = ConversionSpecifier::CArg; break;
    168     case 'S': k = ConversionSpecifier::SArg; break;
    169     case '[': k = ConversionSpecifier::ScanListArg; break;
    170     case 'u': k = ConversionSpecifier::uArg; break;
    171     case 'x': k = ConversionSpecifier::xArg; break;
    172     case 'o': k = ConversionSpecifier::oArg; break;
    173     case 's': k = ConversionSpecifier::sArg; break;
    174     case 'p': k = ConversionSpecifier::pArg; break;
    175   }
    176   ScanfConversionSpecifier CS(conversionPosition, k);
    177   if (k == ScanfConversionSpecifier::ScanListArg) {
    178     if (ParseScanList(H, CS, I, E))
    179       return true;
    180   }
    181   FS.setConversionSpecifier(CS);
    182   if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
    183       && !FS.usesPositionalArg())
    184     FS.setArgIndex(argIndex++);
    185 
    186   // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
    187   // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
    188 
    189   if (k == ScanfConversionSpecifier::InvalidSpecifier) {
    190     // Assume the conversion takes one argument.
    191     return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
    192   }
    193   return ScanfSpecifierResult(Start, FS);
    194 }
    195 
    196 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
    197   const ScanfConversionSpecifier &CS = getConversionSpecifier();
    198 
    199   if (!CS.consumesDataArgument())
    200     return ArgType::Invalid();
    201 
    202   switch(CS.getKind()) {
    203     // Signed int.
    204     case ConversionSpecifier::dArg:
    205     case ConversionSpecifier::iArg:
    206       switch (LM.getKind()) {
    207         case LengthModifier::None:
    208           return ArgType::PtrTo(Ctx.IntTy);
    209         case LengthModifier::AsChar:
    210           return ArgType::PtrTo(ArgType::AnyCharTy);
    211         case LengthModifier::AsShort:
    212           return ArgType::PtrTo(Ctx.ShortTy);
    213         case LengthModifier::AsLong:
    214           return ArgType::PtrTo(Ctx.LongTy);
    215         case LengthModifier::AsLongLong:
    216         case LengthModifier::AsQuad:
    217           return ArgType::PtrTo(Ctx.LongLongTy);
    218         case LengthModifier::AsIntMax:
    219           return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
    220         case LengthModifier::AsSizeT:
    221           // FIXME: ssize_t.
    222           return ArgType();
    223         case LengthModifier::AsPtrDiff:
    224           return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
    225         case LengthModifier::AsLongDouble:
    226           // GNU extension.
    227           return ArgType::PtrTo(Ctx.LongLongTy);
    228         case LengthModifier::AsAllocate:
    229           return ArgType::Invalid();
    230         case LengthModifier::AsMAllocate:
    231           return ArgType::Invalid();
    232       }
    233 
    234     // Unsigned int.
    235     case ConversionSpecifier::oArg:
    236     case ConversionSpecifier::uArg:
    237     case ConversionSpecifier::xArg:
    238     case ConversionSpecifier::XArg:
    239       switch (LM.getKind()) {
    240         case LengthModifier::None:
    241           return ArgType::PtrTo(Ctx.UnsignedIntTy);
    242         case LengthModifier::AsChar:
    243           return ArgType::PtrTo(Ctx.UnsignedCharTy);
    244         case LengthModifier::AsShort:
    245           return ArgType::PtrTo(Ctx.UnsignedShortTy);
    246         case LengthModifier::AsLong:
    247           return ArgType::PtrTo(Ctx.UnsignedLongTy);
    248         case LengthModifier::AsLongLong:
    249         case LengthModifier::AsQuad:
    250           return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
    251         case LengthModifier::AsIntMax:
    252           return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
    253         case LengthModifier::AsSizeT:
    254           return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
    255         case LengthModifier::AsPtrDiff:
    256           // FIXME: Unsigned version of ptrdiff_t?
    257           return ArgType();
    258         case LengthModifier::AsLongDouble:
    259           // GNU extension.
    260           return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
    261         case LengthModifier::AsAllocate:
    262           return ArgType::Invalid();
    263         case LengthModifier::AsMAllocate:
    264           return ArgType::Invalid();
    265       }
    266 
    267     // Float.
    268     case ConversionSpecifier::aArg:
    269     case ConversionSpecifier::AArg:
    270     case ConversionSpecifier::eArg:
    271     case ConversionSpecifier::EArg:
    272     case ConversionSpecifier::fArg:
    273     case ConversionSpecifier::FArg:
    274     case ConversionSpecifier::gArg:
    275     case ConversionSpecifier::GArg:
    276       switch (LM.getKind()) {
    277         case LengthModifier::None:
    278           return ArgType::PtrTo(Ctx.FloatTy);
    279         case LengthModifier::AsLong:
    280           return ArgType::PtrTo(Ctx.DoubleTy);
    281         case LengthModifier::AsLongDouble:
    282           return ArgType::PtrTo(Ctx.LongDoubleTy);
    283         default:
    284           return ArgType::Invalid();
    285       }
    286 
    287     // Char, string and scanlist.
    288     case ConversionSpecifier::cArg:
    289     case ConversionSpecifier::sArg:
    290     case ConversionSpecifier::ScanListArg:
    291       switch (LM.getKind()) {
    292         case LengthModifier::None:
    293           return ArgType::PtrTo(ArgType::AnyCharTy);
    294         case LengthModifier::AsLong:
    295           return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
    296         case LengthModifier::AsAllocate:
    297         case LengthModifier::AsMAllocate:
    298           return ArgType::PtrTo(ArgType::CStrTy);
    299         default:
    300           return ArgType::Invalid();
    301       }
    302     case ConversionSpecifier::CArg:
    303     case ConversionSpecifier::SArg:
    304       // FIXME: Mac OS X specific?
    305       switch (LM.getKind()) {
    306         case LengthModifier::None:
    307           return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
    308         case LengthModifier::AsAllocate:
    309         case LengthModifier::AsMAllocate:
    310           return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
    311         default:
    312           return ArgType::Invalid();
    313       }
    314 
    315     // Pointer.
    316     case ConversionSpecifier::pArg:
    317       return ArgType::PtrTo(ArgType::CPointerTy);
    318 
    319     // Write-back.
    320     case ConversionSpecifier::nArg:
    321       switch (LM.getKind()) {
    322         case LengthModifier::None:
    323           return ArgType::PtrTo(Ctx.IntTy);
    324         case LengthModifier::AsChar:
    325           return ArgType::PtrTo(Ctx.SignedCharTy);
    326         case LengthModifier::AsShort:
    327           return ArgType::PtrTo(Ctx.ShortTy);
    328         case LengthModifier::AsLong:
    329           return ArgType::PtrTo(Ctx.LongTy);
    330         case LengthModifier::AsLongLong:
    331         case LengthModifier::AsQuad:
    332           return ArgType::PtrTo(Ctx.LongLongTy);
    333         case LengthModifier::AsIntMax:
    334           return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
    335         case LengthModifier::AsSizeT:
    336           return ArgType(); // FIXME: ssize_t
    337         case LengthModifier::AsPtrDiff:
    338           return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
    339         case LengthModifier::AsLongDouble:
    340           return ArgType(); // FIXME: Is this a known extension?
    341         case LengthModifier::AsAllocate:
    342         case LengthModifier::AsMAllocate:
    343           return ArgType::Invalid();
    344         }
    345 
    346     default:
    347       break;
    348   }
    349 
    350   return ArgType();
    351 }
    352 
    353 bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
    354                              ASTContext &Ctx) {
    355   if (!QT->isPointerType())
    356     return false;
    357 
    358   // %n is different from other conversion specifiers; don't try to fix it.
    359   if (CS.getKind() == ConversionSpecifier::nArg)
    360     return false;
    361 
    362   QualType PT = QT->getPointeeType();
    363 
    364   // If it's an enum, get its underlying type.
    365   if (const EnumType *ETy = QT->getAs<EnumType>())
    366     QT = ETy->getDecl()->getIntegerType();
    367 
    368   const BuiltinType *BT = PT->getAs<BuiltinType>();
    369   if (!BT)
    370     return false;
    371 
    372   // Pointer to a character.
    373   if (PT->isAnyCharacterType()) {
    374     CS.setKind(ConversionSpecifier::sArg);
    375     if (PT->isWideCharType())
    376       LM.setKind(LengthModifier::AsWideChar);
    377     else
    378       LM.setKind(LengthModifier::None);
    379     return true;
    380   }
    381 
    382   // Figure out the length modifier.
    383   switch (BT->getKind()) {
    384     // no modifier
    385     case BuiltinType::UInt:
    386     case BuiltinType::Int:
    387     case BuiltinType::Float:
    388       LM.setKind(LengthModifier::None);
    389       break;
    390 
    391     // hh
    392     case BuiltinType::Char_U:
    393     case BuiltinType::UChar:
    394     case BuiltinType::Char_S:
    395     case BuiltinType::SChar:
    396       LM.setKind(LengthModifier::AsChar);
    397       break;
    398 
    399     // h
    400     case BuiltinType::Short:
    401     case BuiltinType::UShort:
    402       LM.setKind(LengthModifier::AsShort);
    403       break;
    404 
    405     // l
    406     case BuiltinType::Long:
    407     case BuiltinType::ULong:
    408     case BuiltinType::Double:
    409       LM.setKind(LengthModifier::AsLong);
    410       break;
    411 
    412     // ll
    413     case BuiltinType::LongLong:
    414     case BuiltinType::ULongLong:
    415       LM.setKind(LengthModifier::AsLongLong);
    416       break;
    417 
    418     // L
    419     case BuiltinType::LongDouble:
    420       LM.setKind(LengthModifier::AsLongDouble);
    421       break;
    422 
    423     // Don't know.
    424     default:
    425       return false;
    426   }
    427 
    428   // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
    429   if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x))
    430     namedTypeToLengthModifier(PT, LM);
    431 
    432   // If fixing the length modifier was enough, we are done.
    433   if (hasValidLengthModifier(Ctx.getTargetInfo())) {
    434     const analyze_scanf::ArgType &AT = getArgType(Ctx);
    435     if (AT.isValid() && AT.matchesType(Ctx, QT))
    436       return true;
    437   }
    438 
    439   // Figure out the conversion specifier.
    440   if (PT->isRealFloatingType())
    441     CS.setKind(ConversionSpecifier::fArg);
    442   else if (PT->isSignedIntegerType())
    443     CS.setKind(ConversionSpecifier::dArg);
    444   else if (PT->isUnsignedIntegerType())
    445     CS.setKind(ConversionSpecifier::uArg);
    446   else
    447     llvm_unreachable("Unexpected type");
    448 
    449   return true;
    450 }
    451 
    452 void ScanfSpecifier::toString(raw_ostream &os) const {
    453   os << "%";
    454 
    455   if (usesPositionalArg())
    456     os << getPositionalArgIndex() << "$";
    457   if (SuppressAssignment)
    458     os << "*";
    459 
    460   FieldWidth.toString(os);
    461   os << LM.toString();
    462   os << CS.toString();
    463 }
    464 
    465 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
    466                                                     const char *I,
    467                                                     const char *E,
    468                                                     const LangOptions &LO) {
    469 
    470   unsigned argIndex = 0;
    471 
    472   // Keep looking for a format specifier until we have exhausted the string.
    473   while (I != E) {
    474     const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
    475                                                           LO);
    476     // Did a fail-stop error of any kind occur when parsing the specifier?
    477     // If so, don't do any more processing.
    478     if (FSR.shouldStop())
    479       return true;
    480       // Did we exhaust the string or encounter an error that
    481       // we can recover from?
    482     if (!FSR.hasValue())
    483       continue;
    484       // We have a format specifier.  Pass it to the callback.
    485     if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
    486                                 I - FSR.getStart())) {
    487       return true;
    488     }
    489   }
    490   assert(I == E && "Format string not exhausted");
    491   return false;
    492 }
    493