Home | History | Annotate | Download | only in Analysis
      1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Handling of format string in scanf and friends.  The structure of format
     11 // strings for fscanf() are described in C99 7.19.6.2.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "clang/Analysis/Analyses/FormatString.h"
     16 #include "FormatStringParsing.h"
     17 #include "clang/Basic/TargetInfo.h"
     18 
     19 using clang::analyze_format_string::ArgType;
     20 using clang::analyze_format_string::FormatStringHandler;
     21 using clang::analyze_format_string::LengthModifier;
     22 using clang::analyze_format_string::OptionalAmount;
     23 using clang::analyze_format_string::ConversionSpecifier;
     24 using clang::analyze_scanf::ScanfConversionSpecifier;
     25 using clang::analyze_scanf::ScanfSpecifier;
     26 using clang::UpdateOnReturn;
     27 using namespace clang;
     28 
     29 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
     30         ScanfSpecifierResult;
     31 
     32 static bool ParseScanList(FormatStringHandler &H,
     33                           ScanfConversionSpecifier &CS,
     34                           const char *&Beg, const char *E) {
     35   const char *I = Beg;
     36   const char *start = I - 1;
     37   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
     38 
     39   // No more characters?
     40   if (I == E) {
     41     H.HandleIncompleteScanList(start, I);
     42     return true;
     43   }
     44 
     45   // Special case: ']' is the first character.
     46   if (*I == ']') {
     47     if (++I == E) {
     48       H.HandleIncompleteScanList(start, I - 1);
     49       return true;
     50     }
     51   }
     52 
     53   // Special case: "^]" are the first characters.
     54   if (I + 1 != E && I[0] == '^' && I[1] == ']') {
     55     I += 2;
     56     if (I == E) {
     57       H.HandleIncompleteScanList(start, I - 1);
     58       return true;
     59     }
     60   }
     61 
     62   // Look for a ']' character which denotes the end of the scan list.
     63   while (*I != ']') {
     64     if (++I == E) {
     65       H.HandleIncompleteScanList(start, I - 1);
     66       return true;
     67     }
     68   }
     69 
     70   CS.setEndScanList(I);
     71   return false;
     72 }
     73 
     74 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
     75 // We can possibly refactor.
     76 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
     77                                                 const char *&Beg,
     78                                                 const char *E,
     79                                                 unsigned &argIndex,
     80                                                 const LangOptions &LO,
     81                                                 const TargetInfo &Target) {
     82   using namespace clang::analyze_format_string;
     83   using namespace clang::analyze_scanf;
     84   const char *I = Beg;
     85   const char *Start = nullptr;
     86   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
     87 
     88     // Look for a '%' character that indicates the start of a format specifier.
     89   for ( ; I != E ; ++I) {
     90     char c = *I;
     91     if (c == '\0') {
     92         // Detect spurious null characters, which are likely errors.
     93       H.HandleNullChar(I);
     94       return true;
     95     }
     96     if (c == '%') {
     97       Start = I++;  // Record the start of the format specifier.
     98       break;
     99     }
    100   }
    101 
    102     // No format specifier found?
    103   if (!Start)
    104     return false;
    105 
    106   if (I == E) {
    107       // No more characters left?
    108     H.HandleIncompleteSpecifier(Start, E - Start);
    109     return true;
    110   }
    111 
    112   ScanfSpecifier FS;
    113   if (ParseArgPosition(H, FS, Start, I, E))
    114     return true;
    115 
    116   if (I == E) {
    117       // No more characters left?
    118     H.HandleIncompleteSpecifier(Start, E - Start);
    119     return true;
    120   }
    121 
    122   // Look for '*' flag if it is present.
    123   if (*I == '*') {
    124     FS.setSuppressAssignment(I);
    125     if (++I == E) {
    126       H.HandleIncompleteSpecifier(Start, E - Start);
    127       return true;
    128     }
    129   }
    130 
    131   // Look for the field width (if any).  Unlike printf, this is either
    132   // a fixed integer or isn't present.
    133   const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
    134   if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
    135     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
    136     FS.setFieldWidth(Amt);
    137 
    138     if (I == E) {
    139       // No more characters left?
    140       H.HandleIncompleteSpecifier(Start, E - Start);
    141       return true;
    142     }
    143   }
    144 
    145   // Look for the length modifier.
    146   if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
    147       // No more characters left?
    148     H.HandleIncompleteSpecifier(Start, E - Start);
    149     return true;
    150   }
    151 
    152   // Detect spurious null characters, which are likely errors.
    153   if (*I == '\0') {
    154     H.HandleNullChar(I);
    155     return true;
    156   }
    157 
    158   // Finally, look for the conversion specifier.
    159   const char *conversionPosition = I++;
    160   ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
    161   switch (*conversionPosition) {
    162     default:
    163       break;
    164     case '%': k = ConversionSpecifier::PercentArg;   break;
    165     case 'A': k = ConversionSpecifier::AArg; break;
    166     case 'E': k = ConversionSpecifier::EArg; break;
    167     case 'F': k = ConversionSpecifier::FArg; break;
    168     case 'G': k = ConversionSpecifier::GArg; break;
    169     case 'X': k = ConversionSpecifier::XArg; break;
    170     case 'a': k = ConversionSpecifier::aArg; break;
    171     case 'd': k = ConversionSpecifier::dArg; break;
    172     case 'e': k = ConversionSpecifier::eArg; break;
    173     case 'f': k = ConversionSpecifier::fArg; break;
    174     case 'g': k = ConversionSpecifier::gArg; break;
    175     case 'i': k = ConversionSpecifier::iArg; break;
    176     case 'n': k = ConversionSpecifier::nArg; break;
    177     case 'c': k = ConversionSpecifier::cArg; break;
    178     case 'C': k = ConversionSpecifier::CArg; break;
    179     case 'S': k = ConversionSpecifier::SArg; break;
    180     case '[': k = ConversionSpecifier::ScanListArg; break;
    181     case 'u': k = ConversionSpecifier::uArg; break;
    182     case 'x': k = ConversionSpecifier::xArg; break;
    183     case 'o': k = ConversionSpecifier::oArg; break;
    184     case 's': k = ConversionSpecifier::sArg; break;
    185     case 'p': k = ConversionSpecifier::pArg; break;
    186     // Apple extensions
    187       // Apple-specific
    188     case 'D':
    189       if (Target.getTriple().isOSDarwin())
    190         k = ConversionSpecifier::DArg;
    191       break;
    192     case 'O':
    193       if (Target.getTriple().isOSDarwin())
    194         k = ConversionSpecifier::OArg;
    195       break;
    196     case 'U':
    197       if (Target.getTriple().isOSDarwin())
    198         k = ConversionSpecifier::UArg;
    199       break;
    200   }
    201   ScanfConversionSpecifier CS(conversionPosition, k);
    202   if (k == ScanfConversionSpecifier::ScanListArg) {
    203     if (ParseScanList(H, CS, I, E))
    204       return true;
    205   }
    206   FS.setConversionSpecifier(CS);
    207   if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
    208       && !FS.usesPositionalArg())
    209     FS.setArgIndex(argIndex++);
    210 
    211   // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
    212   // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
    213 
    214   if (k == ScanfConversionSpecifier::InvalidSpecifier) {
    215     unsigned Len = I - Beg;
    216     if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
    217       CS.setEndScanList(Beg + Len);
    218       FS.setConversionSpecifier(CS);
    219     }
    220     // Assume the conversion takes one argument.
    221     return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
    222   }
    223   return ScanfSpecifierResult(Start, FS);
    224 }
    225 
    226 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
    227   const ScanfConversionSpecifier &CS = getConversionSpecifier();
    228 
    229   if (!CS.consumesDataArgument())
    230     return ArgType::Invalid();
    231 
    232   switch(CS.getKind()) {
    233     // Signed int.
    234     case ConversionSpecifier::dArg:
    235     case ConversionSpecifier::DArg:
    236     case ConversionSpecifier::iArg:
    237       switch (LM.getKind()) {
    238         case LengthModifier::None:
    239           return ArgType::PtrTo(Ctx.IntTy);
    240         case LengthModifier::AsChar:
    241           return ArgType::PtrTo(ArgType::AnyCharTy);
    242         case LengthModifier::AsShort:
    243           return ArgType::PtrTo(Ctx.ShortTy);
    244         case LengthModifier::AsLong:
    245           return ArgType::PtrTo(Ctx.LongTy);
    246         case LengthModifier::AsLongLong:
    247         case LengthModifier::AsQuad:
    248           return ArgType::PtrTo(Ctx.LongLongTy);
    249         case LengthModifier::AsInt64:
    250           return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
    251         case LengthModifier::AsIntMax:
    252           return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
    253         case LengthModifier::AsSizeT:
    254           // FIXME: ssize_t.
    255           return ArgType();
    256         case LengthModifier::AsPtrDiff:
    257           return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
    258         case LengthModifier::AsLongDouble:
    259           // GNU extension.
    260           return ArgType::PtrTo(Ctx.LongLongTy);
    261         case LengthModifier::AsAllocate:
    262         case LengthModifier::AsMAllocate:
    263         case LengthModifier::AsInt32:
    264         case LengthModifier::AsInt3264:
    265         case LengthModifier::AsWide:
    266           return ArgType::Invalid();
    267       }
    268 
    269     // Unsigned int.
    270     case ConversionSpecifier::oArg:
    271     case ConversionSpecifier::OArg:
    272     case ConversionSpecifier::uArg:
    273     case ConversionSpecifier::UArg:
    274     case ConversionSpecifier::xArg:
    275     case ConversionSpecifier::XArg:
    276       switch (LM.getKind()) {
    277         case LengthModifier::None:
    278           return ArgType::PtrTo(Ctx.UnsignedIntTy);
    279         case LengthModifier::AsChar:
    280           return ArgType::PtrTo(Ctx.UnsignedCharTy);
    281         case LengthModifier::AsShort:
    282           return ArgType::PtrTo(Ctx.UnsignedShortTy);
    283         case LengthModifier::AsLong:
    284           return ArgType::PtrTo(Ctx.UnsignedLongTy);
    285         case LengthModifier::AsLongLong:
    286         case LengthModifier::AsQuad:
    287           return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
    288         case LengthModifier::AsInt64:
    289           return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
    290         case LengthModifier::AsIntMax:
    291           return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
    292         case LengthModifier::AsSizeT:
    293           return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
    294         case LengthModifier::AsPtrDiff:
    295           // FIXME: Unsigned version of ptrdiff_t?
    296           return ArgType();
    297         case LengthModifier::AsLongDouble:
    298           // GNU extension.
    299           return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
    300         case LengthModifier::AsAllocate:
    301         case LengthModifier::AsMAllocate:
    302         case LengthModifier::AsInt32:
    303         case LengthModifier::AsInt3264:
    304         case LengthModifier::AsWide:
    305           return ArgType::Invalid();
    306       }
    307 
    308     // Float.
    309     case ConversionSpecifier::aArg:
    310     case ConversionSpecifier::AArg:
    311     case ConversionSpecifier::eArg:
    312     case ConversionSpecifier::EArg:
    313     case ConversionSpecifier::fArg:
    314     case ConversionSpecifier::FArg:
    315     case ConversionSpecifier::gArg:
    316     case ConversionSpecifier::GArg:
    317       switch (LM.getKind()) {
    318         case LengthModifier::None:
    319           return ArgType::PtrTo(Ctx.FloatTy);
    320         case LengthModifier::AsLong:
    321           return ArgType::PtrTo(Ctx.DoubleTy);
    322         case LengthModifier::AsLongDouble:
    323           return ArgType::PtrTo(Ctx.LongDoubleTy);
    324         default:
    325           return ArgType::Invalid();
    326       }
    327 
    328     // Char, string and scanlist.
    329     case ConversionSpecifier::cArg:
    330     case ConversionSpecifier::sArg:
    331     case ConversionSpecifier::ScanListArg:
    332       switch (LM.getKind()) {
    333         case LengthModifier::None:
    334           return ArgType::PtrTo(ArgType::AnyCharTy);
    335         case LengthModifier::AsLong:
    336         case LengthModifier::AsWide:
    337           return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
    338         case LengthModifier::AsAllocate:
    339         case LengthModifier::AsMAllocate:
    340           return ArgType::PtrTo(ArgType::CStrTy);
    341         case LengthModifier::AsShort:
    342           if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
    343             return ArgType::PtrTo(ArgType::AnyCharTy);
    344         default:
    345           return ArgType::Invalid();
    346       }
    347     case ConversionSpecifier::CArg:
    348     case ConversionSpecifier::SArg:
    349       // FIXME: Mac OS X specific?
    350       switch (LM.getKind()) {
    351         case LengthModifier::None:
    352         case LengthModifier::AsWide:
    353           return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
    354         case LengthModifier::AsAllocate:
    355         case LengthModifier::AsMAllocate:
    356           return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
    357         case LengthModifier::AsShort:
    358           if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
    359             return ArgType::PtrTo(ArgType::AnyCharTy);
    360         default:
    361           return ArgType::Invalid();
    362       }
    363 
    364     // Pointer.
    365     case ConversionSpecifier::pArg:
    366       return ArgType::PtrTo(ArgType::CPointerTy);
    367 
    368     // Write-back.
    369     case ConversionSpecifier::nArg:
    370       switch (LM.getKind()) {
    371         case LengthModifier::None:
    372           return ArgType::PtrTo(Ctx.IntTy);
    373         case LengthModifier::AsChar:
    374           return ArgType::PtrTo(Ctx.SignedCharTy);
    375         case LengthModifier::AsShort:
    376           return ArgType::PtrTo(Ctx.ShortTy);
    377         case LengthModifier::AsLong:
    378           return ArgType::PtrTo(Ctx.LongTy);
    379         case LengthModifier::AsLongLong:
    380         case LengthModifier::AsQuad:
    381           return ArgType::PtrTo(Ctx.LongLongTy);
    382         case LengthModifier::AsInt64:
    383           return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
    384         case LengthModifier::AsIntMax:
    385           return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
    386         case LengthModifier::AsSizeT:
    387           return ArgType(); // FIXME: ssize_t
    388         case LengthModifier::AsPtrDiff:
    389           return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
    390         case LengthModifier::AsLongDouble:
    391           return ArgType(); // FIXME: Is this a known extension?
    392         case LengthModifier::AsAllocate:
    393         case LengthModifier::AsMAllocate:
    394         case LengthModifier::AsInt32:
    395         case LengthModifier::AsInt3264:
    396         case LengthModifier::AsWide:
    397           return ArgType::Invalid();
    398         }
    399 
    400     default:
    401       break;
    402   }
    403 
    404   return ArgType();
    405 }
    406 
    407 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
    408                              const LangOptions &LangOpt,
    409                              ASTContext &Ctx) {
    410 
    411   // %n is different from other conversion specifiers; don't try to fix it.
    412   if (CS.getKind() == ConversionSpecifier::nArg)
    413     return false;
    414 
    415   if (!QT->isPointerType())
    416     return false;
    417 
    418   QualType PT = QT->getPointeeType();
    419 
    420   // If it's an enum, get its underlying type.
    421   if (const EnumType *ETy = PT->getAs<EnumType>())
    422     PT = ETy->getDecl()->getIntegerType();
    423 
    424   const BuiltinType *BT = PT->getAs<BuiltinType>();
    425   if (!BT)
    426     return false;
    427 
    428   // Pointer to a character.
    429   if (PT->isAnyCharacterType()) {
    430     CS.setKind(ConversionSpecifier::sArg);
    431     if (PT->isWideCharType())
    432       LM.setKind(LengthModifier::AsWideChar);
    433     else
    434       LM.setKind(LengthModifier::None);
    435 
    436     // If we know the target array length, we can use it as a field width.
    437     if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
    438       if (CAT->getSizeModifier() == ArrayType::Normal)
    439         FieldWidth = OptionalAmount(OptionalAmount::Constant,
    440                                     CAT->getSize().getZExtValue() - 1,
    441                                     "", 0, false);
    442 
    443     }
    444     return true;
    445   }
    446 
    447   // Figure out the length modifier.
    448   switch (BT->getKind()) {
    449     // no modifier
    450     case BuiltinType::UInt:
    451     case BuiltinType::Int:
    452     case BuiltinType::Float:
    453       LM.setKind(LengthModifier::None);
    454       break;
    455 
    456     // hh
    457     case BuiltinType::Char_U:
    458     case BuiltinType::UChar:
    459     case BuiltinType::Char_S:
    460     case BuiltinType::SChar:
    461       LM.setKind(LengthModifier::AsChar);
    462       break;
    463 
    464     // h
    465     case BuiltinType::Short:
    466     case BuiltinType::UShort:
    467       LM.setKind(LengthModifier::AsShort);
    468       break;
    469 
    470     // l
    471     case BuiltinType::Long:
    472     case BuiltinType::ULong:
    473     case BuiltinType::Double:
    474       LM.setKind(LengthModifier::AsLong);
    475       break;
    476 
    477     // ll
    478     case BuiltinType::LongLong:
    479     case BuiltinType::ULongLong:
    480       LM.setKind(LengthModifier::AsLongLong);
    481       break;
    482 
    483     // L
    484     case BuiltinType::LongDouble:
    485       LM.setKind(LengthModifier::AsLongDouble);
    486       break;
    487 
    488     // Don't know.
    489     default:
    490       return false;
    491   }
    492 
    493   // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
    494   if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
    495     namedTypeToLengthModifier(PT, LM);
    496 
    497   // If fixing the length modifier was enough, we are done.
    498   if (hasValidLengthModifier(Ctx.getTargetInfo())) {
    499     const analyze_scanf::ArgType &AT = getArgType(Ctx);
    500     if (AT.isValid() && AT.matchesType(Ctx, QT))
    501       return true;
    502   }
    503 
    504   // Figure out the conversion specifier.
    505   if (PT->isRealFloatingType())
    506     CS.setKind(ConversionSpecifier::fArg);
    507   else if (PT->isSignedIntegerType())
    508     CS.setKind(ConversionSpecifier::dArg);
    509   else if (PT->isUnsignedIntegerType())
    510     CS.setKind(ConversionSpecifier::uArg);
    511   else
    512     llvm_unreachable("Unexpected type");
    513 
    514   return true;
    515 }
    516 
    517 void ScanfSpecifier::toString(raw_ostream &os) const {
    518   os << "%";
    519 
    520   if (usesPositionalArg())
    521     os << getPositionalArgIndex() << "$";
    522   if (SuppressAssignment)
    523     os << "*";
    524 
    525   FieldWidth.toString(os);
    526   os << LM.toString();
    527   os << CS.toString();
    528 }
    529 
    530 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
    531                                                     const char *I,
    532                                                     const char *E,
    533                                                     const LangOptions &LO,
    534                                                     const TargetInfo &Target) {
    535 
    536   unsigned argIndex = 0;
    537 
    538   // Keep looking for a format specifier until we have exhausted the string.
    539   while (I != E) {
    540     const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
    541                                                           LO, Target);
    542     // Did a fail-stop error of any kind occur when parsing the specifier?
    543     // If so, don't do any more processing.
    544     if (FSR.shouldStop())
    545       return true;
    546       // Did we exhaust the string or encounter an error that
    547       // we can recover from?
    548     if (!FSR.hasValue())
    549       continue;
    550       // We have a format specifier.  Pass it to the callback.
    551     if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
    552                                 I - FSR.getStart())) {
    553       return true;
    554     }
    555   }
    556   assert(I == E && "Format string not exhausted");
    557   return false;
    558 }
    559