Home | History | Annotate | Download | only in Analysis
      1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Handling of format string in scanf and friends.  The structure of format
     11 // strings for fscanf() are described in C99 7.19.6.2.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "clang/Analysis/Analyses/FormatString.h"
     16 #include "FormatStringParsing.h"
     17 #include "clang/Basic/TargetInfo.h"
     18 
     19 using clang::analyze_format_string::ArgType;
     20 using clang::analyze_format_string::FormatStringHandler;
     21 using clang::analyze_format_string::LengthModifier;
     22 using clang::analyze_format_string::OptionalAmount;
     23 using clang::analyze_format_string::ConversionSpecifier;
     24 using clang::analyze_scanf::ScanfConversionSpecifier;
     25 using clang::analyze_scanf::ScanfSpecifier;
     26 using clang::UpdateOnReturn;
     27 using namespace clang;
     28 
     29 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
     30         ScanfSpecifierResult;
     31 
     32 static bool ParseScanList(FormatStringHandler &H,
     33                           ScanfConversionSpecifier &CS,
     34                           const char *&Beg, const char *E) {
     35   const char *I = Beg;
     36   const char *start = I - 1;
     37   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
     38 
     39   // No more characters?
     40   if (I == E) {
     41     H.HandleIncompleteScanList(start, I);
     42     return true;
     43   }
     44 
     45   // Special case: ']' is the first character.
     46   if (*I == ']') {
     47     if (++I == E) {
     48       H.HandleIncompleteScanList(start, I - 1);
     49       return true;
     50     }
     51   }
     52 
     53   // Look for a ']' character which denotes the end of the scan list.
     54   while (*I != ']') {
     55     if (++I == E) {
     56       H.HandleIncompleteScanList(start, I - 1);
     57       return true;
     58     }
     59   }
     60 
     61   CS.setEndScanList(I);
     62   return false;
     63 }
     64 
     65 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
     66 // We can possibly refactor.
     67 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
     68                                                 const char *&Beg,
     69                                                 const char *E,
     70                                                 unsigned &argIndex,
     71                                                 const LangOptions &LO,
     72                                                 const TargetInfo &Target) {
     73 
     74   using namespace clang::analyze_scanf;
     75   const char *I = Beg;
     76   const char *Start = 0;
     77   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
     78 
     79     // Look for a '%' character that indicates the start of a format specifier.
     80   for ( ; I != E ; ++I) {
     81     char c = *I;
     82     if (c == '\0') {
     83         // Detect spurious null characters, which are likely errors.
     84       H.HandleNullChar(I);
     85       return true;
     86     }
     87     if (c == '%') {
     88       Start = I++;  // Record the start of the format specifier.
     89       break;
     90     }
     91   }
     92 
     93     // No format specifier found?
     94   if (!Start)
     95     return false;
     96 
     97   if (I == E) {
     98       // No more characters left?
     99     H.HandleIncompleteSpecifier(Start, E - Start);
    100     return true;
    101   }
    102 
    103   ScanfSpecifier FS;
    104   if (ParseArgPosition(H, FS, Start, I, E))
    105     return true;
    106 
    107   if (I == E) {
    108       // No more characters left?
    109     H.HandleIncompleteSpecifier(Start, E - Start);
    110     return true;
    111   }
    112 
    113   // Look for '*' flag if it is present.
    114   if (*I == '*') {
    115     FS.setSuppressAssignment(I);
    116     if (++I == E) {
    117       H.HandleIncompleteSpecifier(Start, E - Start);
    118       return true;
    119     }
    120   }
    121 
    122   // Look for the field width (if any).  Unlike printf, this is either
    123   // a fixed integer or isn't present.
    124   const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
    125   if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
    126     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
    127     FS.setFieldWidth(Amt);
    128 
    129     if (I == E) {
    130       // No more characters left?
    131       H.HandleIncompleteSpecifier(Start, E - Start);
    132       return true;
    133     }
    134   }
    135 
    136   // Look for the length modifier.
    137   if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
    138       // No more characters left?
    139     H.HandleIncompleteSpecifier(Start, E - Start);
    140     return true;
    141   }
    142 
    143   // Detect spurious null characters, which are likely errors.
    144   if (*I == '\0') {
    145     H.HandleNullChar(I);
    146     return true;
    147   }
    148 
    149   // Finally, look for the conversion specifier.
    150   const char *conversionPosition = I++;
    151   ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
    152   switch (*conversionPosition) {
    153     default:
    154       break;
    155     case '%': k = ConversionSpecifier::PercentArg;   break;
    156     case 'A': k = ConversionSpecifier::AArg; break;
    157     case 'E': k = ConversionSpecifier::EArg; break;
    158     case 'F': k = ConversionSpecifier::FArg; break;
    159     case 'G': k = ConversionSpecifier::GArg; break;
    160     case 'X': k = ConversionSpecifier::XArg; break;
    161     case 'a': k = ConversionSpecifier::aArg; break;
    162     case 'd': k = ConversionSpecifier::dArg; break;
    163     case 'e': k = ConversionSpecifier::eArg; break;
    164     case 'f': k = ConversionSpecifier::fArg; break;
    165     case 'g': k = ConversionSpecifier::gArg; break;
    166     case 'i': k = ConversionSpecifier::iArg; break;
    167     case 'n': k = ConversionSpecifier::nArg; break;
    168     case 'c': k = ConversionSpecifier::cArg; break;
    169     case 'C': k = ConversionSpecifier::CArg; break;
    170     case 'S': k = ConversionSpecifier::SArg; break;
    171     case '[': k = ConversionSpecifier::ScanListArg; break;
    172     case 'u': k = ConversionSpecifier::uArg; break;
    173     case 'x': k = ConversionSpecifier::xArg; break;
    174     case 'o': k = ConversionSpecifier::oArg; break;
    175     case 's': k = ConversionSpecifier::sArg; break;
    176     case 'p': k = ConversionSpecifier::pArg; break;
    177     // Apple extensions
    178       // Apple-specific
    179     case 'D':
    180       if (Target.getTriple().isOSDarwin())
    181         k = ConversionSpecifier::DArg;
    182       break;
    183     case 'O':
    184       if (Target.getTriple().isOSDarwin())
    185         k = ConversionSpecifier::OArg;
    186       break;
    187     case 'U':
    188       if (Target.getTriple().isOSDarwin())
    189         k = ConversionSpecifier::UArg;
    190       break;
    191   }
    192   ScanfConversionSpecifier CS(conversionPosition, k);
    193   if (k == ScanfConversionSpecifier::ScanListArg) {
    194     if (ParseScanList(H, CS, I, E))
    195       return true;
    196   }
    197   FS.setConversionSpecifier(CS);
    198   if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
    199       && !FS.usesPositionalArg())
    200     FS.setArgIndex(argIndex++);
    201 
    202   // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
    203   // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
    204 
    205   if (k == ScanfConversionSpecifier::InvalidSpecifier) {
    206     // Assume the conversion takes one argument.
    207     return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
    208   }
    209   return ScanfSpecifierResult(Start, FS);
    210 }
    211 
    212 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
    213   const ScanfConversionSpecifier &CS = getConversionSpecifier();
    214 
    215   if (!CS.consumesDataArgument())
    216     return ArgType::Invalid();
    217 
    218   switch(CS.getKind()) {
    219     // Signed int.
    220     case ConversionSpecifier::dArg:
    221     case ConversionSpecifier::DArg:
    222     case ConversionSpecifier::iArg:
    223       switch (LM.getKind()) {
    224         case LengthModifier::None:
    225           return ArgType::PtrTo(Ctx.IntTy);
    226         case LengthModifier::AsChar:
    227           return ArgType::PtrTo(ArgType::AnyCharTy);
    228         case LengthModifier::AsShort:
    229           return ArgType::PtrTo(Ctx.ShortTy);
    230         case LengthModifier::AsLong:
    231           return ArgType::PtrTo(Ctx.LongTy);
    232         case LengthModifier::AsLongLong:
    233         case LengthModifier::AsQuad:
    234           return ArgType::PtrTo(Ctx.LongLongTy);
    235         case LengthModifier::AsIntMax:
    236           return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
    237         case LengthModifier::AsSizeT:
    238           // FIXME: ssize_t.
    239           return ArgType();
    240         case LengthModifier::AsPtrDiff:
    241           return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
    242         case LengthModifier::AsLongDouble:
    243           // GNU extension.
    244           return ArgType::PtrTo(Ctx.LongLongTy);
    245         case LengthModifier::AsAllocate:
    246           return ArgType::Invalid();
    247         case LengthModifier::AsMAllocate:
    248           return ArgType::Invalid();
    249       }
    250 
    251     // Unsigned int.
    252     case ConversionSpecifier::oArg:
    253     case ConversionSpecifier::OArg:
    254     case ConversionSpecifier::uArg:
    255     case ConversionSpecifier::UArg:
    256     case ConversionSpecifier::xArg:
    257     case ConversionSpecifier::XArg:
    258       switch (LM.getKind()) {
    259         case LengthModifier::None:
    260           return ArgType::PtrTo(Ctx.UnsignedIntTy);
    261         case LengthModifier::AsChar:
    262           return ArgType::PtrTo(Ctx.UnsignedCharTy);
    263         case LengthModifier::AsShort:
    264           return ArgType::PtrTo(Ctx.UnsignedShortTy);
    265         case LengthModifier::AsLong:
    266           return ArgType::PtrTo(Ctx.UnsignedLongTy);
    267         case LengthModifier::AsLongLong:
    268         case LengthModifier::AsQuad:
    269           return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
    270         case LengthModifier::AsIntMax:
    271           return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
    272         case LengthModifier::AsSizeT:
    273           return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
    274         case LengthModifier::AsPtrDiff:
    275           // FIXME: Unsigned version of ptrdiff_t?
    276           return ArgType();
    277         case LengthModifier::AsLongDouble:
    278           // GNU extension.
    279           return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
    280         case LengthModifier::AsAllocate:
    281           return ArgType::Invalid();
    282         case LengthModifier::AsMAllocate:
    283           return ArgType::Invalid();
    284       }
    285 
    286     // Float.
    287     case ConversionSpecifier::aArg:
    288     case ConversionSpecifier::AArg:
    289     case ConversionSpecifier::eArg:
    290     case ConversionSpecifier::EArg:
    291     case ConversionSpecifier::fArg:
    292     case ConversionSpecifier::FArg:
    293     case ConversionSpecifier::gArg:
    294     case ConversionSpecifier::GArg:
    295       switch (LM.getKind()) {
    296         case LengthModifier::None:
    297           return ArgType::PtrTo(Ctx.FloatTy);
    298         case LengthModifier::AsLong:
    299           return ArgType::PtrTo(Ctx.DoubleTy);
    300         case LengthModifier::AsLongDouble:
    301           return ArgType::PtrTo(Ctx.LongDoubleTy);
    302         default:
    303           return ArgType::Invalid();
    304       }
    305 
    306     // Char, string and scanlist.
    307     case ConversionSpecifier::cArg:
    308     case ConversionSpecifier::sArg:
    309     case ConversionSpecifier::ScanListArg:
    310       switch (LM.getKind()) {
    311         case LengthModifier::None:
    312           return ArgType::PtrTo(ArgType::AnyCharTy);
    313         case LengthModifier::AsLong:
    314           return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
    315         case LengthModifier::AsAllocate:
    316         case LengthModifier::AsMAllocate:
    317           return ArgType::PtrTo(ArgType::CStrTy);
    318         default:
    319           return ArgType::Invalid();
    320       }
    321     case ConversionSpecifier::CArg:
    322     case ConversionSpecifier::SArg:
    323       // FIXME: Mac OS X specific?
    324       switch (LM.getKind()) {
    325         case LengthModifier::None:
    326           return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
    327         case LengthModifier::AsAllocate:
    328         case LengthModifier::AsMAllocate:
    329           return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
    330         default:
    331           return ArgType::Invalid();
    332       }
    333 
    334     // Pointer.
    335     case ConversionSpecifier::pArg:
    336       return ArgType::PtrTo(ArgType::CPointerTy);
    337 
    338     // Write-back.
    339     case ConversionSpecifier::nArg:
    340       switch (LM.getKind()) {
    341         case LengthModifier::None:
    342           return ArgType::PtrTo(Ctx.IntTy);
    343         case LengthModifier::AsChar:
    344           return ArgType::PtrTo(Ctx.SignedCharTy);
    345         case LengthModifier::AsShort:
    346           return ArgType::PtrTo(Ctx.ShortTy);
    347         case LengthModifier::AsLong:
    348           return ArgType::PtrTo(Ctx.LongTy);
    349         case LengthModifier::AsLongLong:
    350         case LengthModifier::AsQuad:
    351           return ArgType::PtrTo(Ctx.LongLongTy);
    352         case LengthModifier::AsIntMax:
    353           return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
    354         case LengthModifier::AsSizeT:
    355           return ArgType(); // FIXME: ssize_t
    356         case LengthModifier::AsPtrDiff:
    357           return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
    358         case LengthModifier::AsLongDouble:
    359           return ArgType(); // FIXME: Is this a known extension?
    360         case LengthModifier::AsAllocate:
    361         case LengthModifier::AsMAllocate:
    362           return ArgType::Invalid();
    363         }
    364 
    365     default:
    366       break;
    367   }
    368 
    369   return ArgType();
    370 }
    371 
    372 bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
    373                              ASTContext &Ctx) {
    374   if (!QT->isPointerType())
    375     return false;
    376 
    377   // %n is different from other conversion specifiers; don't try to fix it.
    378   if (CS.getKind() == ConversionSpecifier::nArg)
    379     return false;
    380 
    381   QualType PT = QT->getPointeeType();
    382 
    383   // If it's an enum, get its underlying type.
    384   if (const EnumType *ETy = QT->getAs<EnumType>())
    385     QT = ETy->getDecl()->getIntegerType();
    386 
    387   const BuiltinType *BT = PT->getAs<BuiltinType>();
    388   if (!BT)
    389     return false;
    390 
    391   // Pointer to a character.
    392   if (PT->isAnyCharacterType()) {
    393     CS.setKind(ConversionSpecifier::sArg);
    394     if (PT->isWideCharType())
    395       LM.setKind(LengthModifier::AsWideChar);
    396     else
    397       LM.setKind(LengthModifier::None);
    398     return true;
    399   }
    400 
    401   // Figure out the length modifier.
    402   switch (BT->getKind()) {
    403     // no modifier
    404     case BuiltinType::UInt:
    405     case BuiltinType::Int:
    406     case BuiltinType::Float:
    407       LM.setKind(LengthModifier::None);
    408       break;
    409 
    410     // hh
    411     case BuiltinType::Char_U:
    412     case BuiltinType::UChar:
    413     case BuiltinType::Char_S:
    414     case BuiltinType::SChar:
    415       LM.setKind(LengthModifier::AsChar);
    416       break;
    417 
    418     // h
    419     case BuiltinType::Short:
    420     case BuiltinType::UShort:
    421       LM.setKind(LengthModifier::AsShort);
    422       break;
    423 
    424     // l
    425     case BuiltinType::Long:
    426     case BuiltinType::ULong:
    427     case BuiltinType::Double:
    428       LM.setKind(LengthModifier::AsLong);
    429       break;
    430 
    431     // ll
    432     case BuiltinType::LongLong:
    433     case BuiltinType::ULongLong:
    434       LM.setKind(LengthModifier::AsLongLong);
    435       break;
    436 
    437     // L
    438     case BuiltinType::LongDouble:
    439       LM.setKind(LengthModifier::AsLongDouble);
    440       break;
    441 
    442     // Don't know.
    443     default:
    444       return false;
    445   }
    446 
    447   // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
    448   if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
    449     namedTypeToLengthModifier(PT, LM);
    450 
    451   // If fixing the length modifier was enough, we are done.
    452   if (hasValidLengthModifier(Ctx.getTargetInfo())) {
    453     const analyze_scanf::ArgType &AT = getArgType(Ctx);
    454     if (AT.isValid() && AT.matchesType(Ctx, QT))
    455       return true;
    456   }
    457 
    458   // Figure out the conversion specifier.
    459   if (PT->isRealFloatingType())
    460     CS.setKind(ConversionSpecifier::fArg);
    461   else if (PT->isSignedIntegerType())
    462     CS.setKind(ConversionSpecifier::dArg);
    463   else if (PT->isUnsignedIntegerType())
    464     CS.setKind(ConversionSpecifier::uArg);
    465   else
    466     llvm_unreachable("Unexpected type");
    467 
    468   return true;
    469 }
    470 
    471 void ScanfSpecifier::toString(raw_ostream &os) const {
    472   os << "%";
    473 
    474   if (usesPositionalArg())
    475     os << getPositionalArgIndex() << "$";
    476   if (SuppressAssignment)
    477     os << "*";
    478 
    479   FieldWidth.toString(os);
    480   os << LM.toString();
    481   os << CS.toString();
    482 }
    483 
    484 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
    485                                                     const char *I,
    486                                                     const char *E,
    487                                                     const LangOptions &LO,
    488                                                     const TargetInfo &Target) {
    489 
    490   unsigned argIndex = 0;
    491 
    492   // Keep looking for a format specifier until we have exhausted the string.
    493   while (I != E) {
    494     const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
    495                                                           LO, Target);
    496     // Did a fail-stop error of any kind occur when parsing the specifier?
    497     // If so, don't do any more processing.
    498     if (FSR.shouldStop())
    499       return true;
    500       // Did we exhaust the string or encounter an error that
    501       // we can recover from?
    502     if (!FSR.hasValue())
    503       continue;
    504       // We have a format specifier.  Pass it to the callback.
    505     if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
    506                                 I - FSR.getStart())) {
    507       return true;
    508     }
    509   }
    510   assert(I == E && "Format string not exhausted");
    511   return false;
    512 }
    513