Home | History | Annotate | Download | only in Analysis
      1 // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*-
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Shared details for processing format strings of printf and scanf
     11 // (and friends).
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "FormatStringParsing.h"
     16 #include "clang/Basic/LangOptions.h"
     17 
     18 using clang::analyze_format_string::ArgTypeResult;
     19 using clang::analyze_format_string::FormatStringHandler;
     20 using clang::analyze_format_string::FormatSpecifier;
     21 using clang::analyze_format_string::LengthModifier;
     22 using clang::analyze_format_string::OptionalAmount;
     23 using clang::analyze_format_string::PositionContext;
     24 using clang::analyze_format_string::ConversionSpecifier;
     25 using namespace clang;
     26 
     27 // Key function to FormatStringHandler.
     28 FormatStringHandler::~FormatStringHandler() {}
     29 
     30 //===----------------------------------------------------------------------===//
     31 // Functions for parsing format strings components in both printf and
     32 // scanf format strings.
     33 //===----------------------------------------------------------------------===//
     34 
     35 OptionalAmount
     36 clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
     37   const char *I = Beg;
     38   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
     39 
     40   unsigned accumulator = 0;
     41   bool hasDigits = false;
     42 
     43   for ( ; I != E; ++I) {
     44     char c = *I;
     45     if (c >= '0' && c <= '9') {
     46       hasDigits = true;
     47       accumulator = (accumulator * 10) + (c - '0');
     48       continue;
     49     }
     50 
     51     if (hasDigits)
     52       return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
     53           false);
     54 
     55     break;
     56   }
     57 
     58   return OptionalAmount();
     59 }
     60 
     61 OptionalAmount
     62 clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
     63                                                      const char *E,
     64                                                      unsigned &argIndex) {
     65   if (*Beg == '*') {
     66     ++Beg;
     67     return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
     68   }
     69 
     70   return ParseAmount(Beg, E);
     71 }
     72 
     73 OptionalAmount
     74 clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
     75                                                   const char *Start,
     76                                                   const char *&Beg,
     77                                                   const char *E,
     78                                                   PositionContext p) {
     79   if (*Beg == '*') {
     80     const char *I = Beg + 1;
     81     const OptionalAmount &Amt = ParseAmount(I, E);
     82 
     83     if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
     84       H.HandleInvalidPosition(Beg, I - Beg, p);
     85       return OptionalAmount(false);
     86     }
     87 
     88     if (I == E) {
     89       // No more characters left?
     90       H.HandleIncompleteSpecifier(Start, E - Start);
     91       return OptionalAmount(false);
     92     }
     93 
     94     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
     95 
     96     if (*I == '$') {
     97       // Handle positional arguments
     98 
     99       // Special case: '*0$', since this is an easy mistake.
    100       if (Amt.getConstantAmount() == 0) {
    101         H.HandleZeroPosition(Beg, I - Beg + 1);
    102         return OptionalAmount(false);
    103       }
    104 
    105       const char *Tmp = Beg;
    106       Beg = ++I;
    107 
    108       return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
    109                             Tmp, 0, true);
    110     }
    111 
    112     H.HandleInvalidPosition(Beg, I - Beg, p);
    113     return OptionalAmount(false);
    114   }
    115 
    116   return ParseAmount(Beg, E);
    117 }
    118 
    119 
    120 bool
    121 clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
    122                                               FormatSpecifier &CS,
    123                                               const char *Start,
    124                                               const char *&Beg, const char *E,
    125                                               unsigned *argIndex) {
    126   // FIXME: Support negative field widths.
    127   if (argIndex) {
    128     CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
    129   }
    130   else {
    131     const OptionalAmount Amt =
    132       ParsePositionAmount(H, Start, Beg, E,
    133                           analyze_format_string::FieldWidthPos);
    134 
    135     if (Amt.isInvalid())
    136       return true;
    137     CS.setFieldWidth(Amt);
    138   }
    139   return false;
    140 }
    141 
    142 bool
    143 clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
    144                                                FormatSpecifier &FS,
    145                                                const char *Start,
    146                                                const char *&Beg,
    147                                                const char *E) {
    148   const char *I = Beg;
    149 
    150   const OptionalAmount &Amt = ParseAmount(I, E);
    151 
    152   if (I == E) {
    153     // No more characters left?
    154     H.HandleIncompleteSpecifier(Start, E - Start);
    155     return true;
    156   }
    157 
    158   if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
    159     // Warn that positional arguments are non-standard.
    160     H.HandlePosition(Start, I - Start);
    161 
    162     // Special case: '%0$', since this is an easy mistake.
    163     if (Amt.getConstantAmount() == 0) {
    164       H.HandleZeroPosition(Start, I - Start);
    165       return true;
    166     }
    167 
    168     FS.setArgIndex(Amt.getConstantAmount() - 1);
    169     FS.setUsesPositionalArg();
    170     // Update the caller's pointer if we decided to consume
    171     // these characters.
    172     Beg = I;
    173     return false;
    174   }
    175 
    176   return false;
    177 }
    178 
    179 bool
    180 clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
    181                                                   const char *&I,
    182                                                   const char *E,
    183                                                   const LangOptions &LO,
    184                                                   bool IsScanf) {
    185   LengthModifier::Kind lmKind = LengthModifier::None;
    186   const char *lmPosition = I;
    187   switch (*I) {
    188     default:
    189       return false;
    190     case 'h':
    191       ++I;
    192       lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar)
    193                                      : LengthModifier::AsShort;
    194       break;
    195     case 'l':
    196       ++I;
    197       lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong)
    198                                      : LengthModifier::AsLong;
    199       break;
    200     case 'j': lmKind = LengthModifier::AsIntMax;     ++I; break;
    201     case 'z': lmKind = LengthModifier::AsSizeT;      ++I; break;
    202     case 't': lmKind = LengthModifier::AsPtrDiff;    ++I; break;
    203     case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
    204     case 'q': lmKind = LengthModifier::AsQuad;       ++I; break;
    205     case 'a':
    206       if (IsScanf && !LO.C99 && !LO.CPlusPlus0x) {
    207         // For scanf in C90, look at the next character to see if this should
    208         // be parsed as the GNU extension 'a' length modifier. If not, this
    209         // will be parsed as a conversion specifier.
    210         ++I;
    211         if (I != E && (*I == 's' || *I == 'S' || *I == '[')) {
    212           lmKind = LengthModifier::AsAllocate;
    213           break;
    214         }
    215         --I;
    216       }
    217       return false;
    218     case 'm':
    219       if (IsScanf) {
    220         lmKind = LengthModifier::AsMAllocate;
    221         ++I;
    222         break;
    223       }
    224       return false;
    225   }
    226   LengthModifier lm(lmPosition, lmKind);
    227   FS.setLengthModifier(lm);
    228   return true;
    229 }
    230 
    231 //===----------------------------------------------------------------------===//
    232 // Methods on ArgTypeResult.
    233 //===----------------------------------------------------------------------===//
    234 
    235 bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
    236   switch (K) {
    237     case InvalidTy:
    238       llvm_unreachable("ArgTypeResult must be valid");
    239 
    240     case UnknownTy:
    241       return true;
    242 
    243     case AnyCharTy: {
    244       if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
    245         switch (BT->getKind()) {
    246           default:
    247             break;
    248           case BuiltinType::Char_S:
    249           case BuiltinType::SChar:
    250           case BuiltinType::UChar:
    251           case BuiltinType::Char_U:
    252             return true;
    253         }
    254       return false;
    255     }
    256 
    257     case SpecificTy: {
    258       argTy = C.getCanonicalType(argTy).getUnqualifiedType();
    259       if (T == argTy)
    260         return true;
    261       // Check for "compatible types".
    262       if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
    263         switch (BT->getKind()) {
    264           default:
    265             break;
    266           case BuiltinType::Char_S:
    267           case BuiltinType::SChar:
    268             return T == C.UnsignedCharTy;
    269           case BuiltinType::Char_U:
    270           case BuiltinType::UChar:
    271             return T == C.SignedCharTy;
    272           case BuiltinType::Short:
    273             return T == C.UnsignedShortTy;
    274           case BuiltinType::UShort:
    275             return T == C.ShortTy;
    276           case BuiltinType::Int:
    277             return T == C.UnsignedIntTy;
    278           case BuiltinType::UInt:
    279             return T == C.IntTy;
    280           case BuiltinType::Long:
    281             return T == C.UnsignedLongTy;
    282           case BuiltinType::ULong:
    283             return T == C.LongTy;
    284           case BuiltinType::LongLong:
    285             return T == C.UnsignedLongLongTy;
    286           case BuiltinType::ULongLong:
    287             return T == C.LongLongTy;
    288         }
    289       return false;
    290     }
    291 
    292     case CStrTy: {
    293       const PointerType *PT = argTy->getAs<PointerType>();
    294       if (!PT)
    295         return false;
    296       QualType pointeeTy = PT->getPointeeType();
    297       if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
    298         switch (BT->getKind()) {
    299           case BuiltinType::Void:
    300           case BuiltinType::Char_U:
    301           case BuiltinType::UChar:
    302           case BuiltinType::Char_S:
    303           case BuiltinType::SChar:
    304             return true;
    305           default:
    306             break;
    307         }
    308 
    309       return false;
    310     }
    311 
    312     case WCStrTy: {
    313       const PointerType *PT = argTy->getAs<PointerType>();
    314       if (!PT)
    315         return false;
    316       QualType pointeeTy =
    317         C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
    318       return pointeeTy == C.getWCharType();
    319     }
    320 
    321     case WIntTy: {
    322       // Instead of doing a lookup for the definition of 'wint_t' (which
    323       // is defined by the system headers) instead see if wchar_t and
    324       // the argument type promote to the same type.
    325       QualType PromoWChar =
    326         C.getWCharType()->isPromotableIntegerType()
    327           ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType();
    328       QualType PromoArg =
    329         argTy->isPromotableIntegerType()
    330           ? C.getPromotedIntegerType(argTy) : argTy;
    331 
    332       PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType();
    333       PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType();
    334 
    335       return PromoWChar == PromoArg;
    336     }
    337 
    338     case CPointerTy:
    339       return argTy->isPointerType() || argTy->isObjCObjectPointerType() ||
    340              argTy->isBlockPointerType() || argTy->isNullPtrType();
    341 
    342     case ObjCPointerTy: {
    343       if (argTy->getAs<ObjCObjectPointerType>() ||
    344           argTy->getAs<BlockPointerType>())
    345         return true;
    346 
    347       // Handle implicit toll-free bridging.
    348       if (const PointerType *PT = argTy->getAs<PointerType>()) {
    349         // Things such as CFTypeRef are really just opaque pointers
    350         // to C structs representing CF types that can often be bridged
    351         // to Objective-C objects.  Since the compiler doesn't know which
    352         // structs can be toll-free bridged, we just accept them all.
    353         QualType pointee = PT->getPointeeType();
    354         if (pointee->getAsStructureType() || pointee->isVoidType())
    355           return true;
    356       }
    357       return false;
    358     }
    359   }
    360 
    361   llvm_unreachable("Invalid ArgTypeResult Kind!");
    362 }
    363 
    364 QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
    365   switch (K) {
    366     case InvalidTy:
    367       llvm_unreachable("No representative type for Invalid ArgTypeResult");
    368     case UnknownTy:
    369       return QualType();
    370     case AnyCharTy:
    371       return C.CharTy;
    372     case SpecificTy:
    373       return T;
    374     case CStrTy:
    375       return C.getPointerType(C.CharTy);
    376     case WCStrTy:
    377       return C.getPointerType(C.getWCharType());
    378     case ObjCPointerTy:
    379       return C.ObjCBuiltinIdTy;
    380     case CPointerTy:
    381       return C.VoidPtrTy;
    382     case WIntTy: {
    383       QualType WC = C.getWCharType();
    384       return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC;
    385     }
    386   }
    387 
    388   llvm_unreachable("Invalid ArgTypeResult Kind!");
    389 }
    390 
    391 std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const {
    392   std::string S = getRepresentativeType(C).getAsString();
    393   if (Name && S != Name)
    394     return std::string("'") + Name + "' (aka '" + S + "')";
    395   return std::string("'") + S + "'";
    396 }
    397 
    398 
    399 //===----------------------------------------------------------------------===//
    400 // Methods on OptionalAmount.
    401 //===----------------------------------------------------------------------===//
    402 
    403 ArgTypeResult
    404 analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const {
    405   return Ctx.IntTy;
    406 }
    407 
    408 //===----------------------------------------------------------------------===//
    409 // Methods on LengthModifier.
    410 //===----------------------------------------------------------------------===//
    411 
    412 const char *
    413 analyze_format_string::LengthModifier::toString() const {
    414   switch (kind) {
    415   case AsChar:
    416     return "hh";
    417   case AsShort:
    418     return "h";
    419   case AsLong: // or AsWideChar
    420     return "l";
    421   case AsLongLong:
    422     return "ll";
    423   case AsQuad:
    424     return "q";
    425   case AsIntMax:
    426     return "j";
    427   case AsSizeT:
    428     return "z";
    429   case AsPtrDiff:
    430     return "t";
    431   case AsLongDouble:
    432     return "L";
    433   case AsAllocate:
    434     return "a";
    435   case AsMAllocate:
    436     return "m";
    437   case None:
    438     return "";
    439   }
    440   return NULL;
    441 }
    442 
    443 //===----------------------------------------------------------------------===//
    444 // Methods on ConversionSpecifier.
    445 //===----------------------------------------------------------------------===//
    446 
    447 const char *ConversionSpecifier::toString() const {
    448   switch (kind) {
    449   case dArg: return "d";
    450   case iArg: return "i";
    451   case oArg: return "o";
    452   case uArg: return "u";
    453   case xArg: return "x";
    454   case XArg: return "X";
    455   case fArg: return "f";
    456   case FArg: return "F";
    457   case eArg: return "e";
    458   case EArg: return "E";
    459   case gArg: return "g";
    460   case GArg: return "G";
    461   case aArg: return "a";
    462   case AArg: return "A";
    463   case cArg: return "c";
    464   case sArg: return "s";
    465   case pArg: return "p";
    466   case nArg: return "n";
    467   case PercentArg:  return "%";
    468   case ScanListArg: return "[";
    469   case InvalidSpecifier: return NULL;
    470 
    471   // MacOS X unicode extensions.
    472   case CArg: return "C";
    473   case SArg: return "S";
    474 
    475   // Objective-C specific specifiers.
    476   case ObjCObjArg: return "@";
    477 
    478   // GlibC specific specifiers.
    479   case PrintErrno: return "m";
    480   }
    481   return NULL;
    482 }
    483 
    484 //===----------------------------------------------------------------------===//
    485 // Methods on OptionalAmount.
    486 //===----------------------------------------------------------------------===//
    487 
    488 void OptionalAmount::toString(raw_ostream &os) const {
    489   switch (hs) {
    490   case Invalid:
    491   case NotSpecified:
    492     return;
    493   case Arg:
    494     if (UsesDotPrefix)
    495         os << ".";
    496     if (usesPositionalArg())
    497       os << "*" << getPositionalArgIndex() << "$";
    498     else
    499       os << "*";
    500     break;
    501   case Constant:
    502     if (UsesDotPrefix)
    503         os << ".";
    504     os << amt;
    505     break;
    506   }
    507 }
    508 
    509 bool FormatSpecifier::hasValidLengthModifier() const {
    510   switch (LM.getKind()) {
    511     case LengthModifier::None:
    512       return true;
    513 
    514     // Handle most integer flags
    515     case LengthModifier::AsChar:
    516     case LengthModifier::AsShort:
    517     case LengthModifier::AsLongLong:
    518     case LengthModifier::AsQuad:
    519     case LengthModifier::AsIntMax:
    520     case LengthModifier::AsSizeT:
    521     case LengthModifier::AsPtrDiff:
    522       switch (CS.getKind()) {
    523         case ConversionSpecifier::dArg:
    524         case ConversionSpecifier::iArg:
    525         case ConversionSpecifier::oArg:
    526         case ConversionSpecifier::uArg:
    527         case ConversionSpecifier::xArg:
    528         case ConversionSpecifier::XArg:
    529         case ConversionSpecifier::nArg:
    530           return true;
    531         default:
    532           return false;
    533       }
    534 
    535     // Handle 'l' flag
    536     case LengthModifier::AsLong:
    537       switch (CS.getKind()) {
    538         case ConversionSpecifier::dArg:
    539         case ConversionSpecifier::iArg:
    540         case ConversionSpecifier::oArg:
    541         case ConversionSpecifier::uArg:
    542         case ConversionSpecifier::xArg:
    543         case ConversionSpecifier::XArg:
    544         case ConversionSpecifier::aArg:
    545         case ConversionSpecifier::AArg:
    546         case ConversionSpecifier::fArg:
    547         case ConversionSpecifier::FArg:
    548         case ConversionSpecifier::eArg:
    549         case ConversionSpecifier::EArg:
    550         case ConversionSpecifier::gArg:
    551         case ConversionSpecifier::GArg:
    552         case ConversionSpecifier::nArg:
    553         case ConversionSpecifier::cArg:
    554         case ConversionSpecifier::sArg:
    555         case ConversionSpecifier::ScanListArg:
    556           return true;
    557         default:
    558           return false;
    559       }
    560 
    561     case LengthModifier::AsLongDouble:
    562       switch (CS.getKind()) {
    563         case ConversionSpecifier::aArg:
    564         case ConversionSpecifier::AArg:
    565         case ConversionSpecifier::fArg:
    566         case ConversionSpecifier::FArg:
    567         case ConversionSpecifier::eArg:
    568         case ConversionSpecifier::EArg:
    569         case ConversionSpecifier::gArg:
    570         case ConversionSpecifier::GArg:
    571           return true;
    572         // GNU extension.
    573         case ConversionSpecifier::dArg:
    574         case ConversionSpecifier::iArg:
    575         case ConversionSpecifier::oArg:
    576         case ConversionSpecifier::uArg:
    577         case ConversionSpecifier::xArg:
    578         case ConversionSpecifier::XArg:
    579           return true;
    580         default:
    581           return false;
    582       }
    583 
    584     case LengthModifier::AsAllocate:
    585       switch (CS.getKind()) {
    586         case ConversionSpecifier::sArg:
    587         case ConversionSpecifier::SArg:
    588         case ConversionSpecifier::ScanListArg:
    589           return true;
    590         default:
    591           return false;
    592       }
    593 
    594     case LengthModifier::AsMAllocate:
    595       switch (CS.getKind()) {
    596         case ConversionSpecifier::cArg:
    597         case ConversionSpecifier::CArg:
    598         case ConversionSpecifier::sArg:
    599         case ConversionSpecifier::SArg:
    600         case ConversionSpecifier::ScanListArg:
    601           return true;
    602         default:
    603           return false;
    604       }
    605   }
    606   llvm_unreachable("Invalid LengthModifier Kind!");
    607 }
    608 
    609 bool FormatSpecifier::hasStandardLengthModifier() const {
    610   switch (LM.getKind()) {
    611     case LengthModifier::None:
    612     case LengthModifier::AsChar:
    613     case LengthModifier::AsShort:
    614     case LengthModifier::AsLong:
    615     case LengthModifier::AsLongLong:
    616     case LengthModifier::AsIntMax:
    617     case LengthModifier::AsSizeT:
    618     case LengthModifier::AsPtrDiff:
    619     case LengthModifier::AsLongDouble:
    620       return true;
    621     case LengthModifier::AsAllocate:
    622     case LengthModifier::AsMAllocate:
    623     case LengthModifier::AsQuad:
    624       return false;
    625   }
    626   llvm_unreachable("Invalid LengthModifier Kind!");
    627 }
    628 
    629 bool FormatSpecifier::hasStandardConversionSpecifier(const LangOptions &LangOpt) const {
    630   switch (CS.getKind()) {
    631     case ConversionSpecifier::cArg:
    632     case ConversionSpecifier::dArg:
    633     case ConversionSpecifier::iArg:
    634     case ConversionSpecifier::oArg:
    635     case ConversionSpecifier::uArg:
    636     case ConversionSpecifier::xArg:
    637     case ConversionSpecifier::XArg:
    638     case ConversionSpecifier::fArg:
    639     case ConversionSpecifier::FArg:
    640     case ConversionSpecifier::eArg:
    641     case ConversionSpecifier::EArg:
    642     case ConversionSpecifier::gArg:
    643     case ConversionSpecifier::GArg:
    644     case ConversionSpecifier::aArg:
    645     case ConversionSpecifier::AArg:
    646     case ConversionSpecifier::sArg:
    647     case ConversionSpecifier::pArg:
    648     case ConversionSpecifier::nArg:
    649     case ConversionSpecifier::ObjCObjArg:
    650     case ConversionSpecifier::ScanListArg:
    651     case ConversionSpecifier::PercentArg:
    652       return true;
    653     case ConversionSpecifier::CArg:
    654     case ConversionSpecifier::SArg:
    655       return LangOpt.ObjC1 || LangOpt.ObjC2;
    656     case ConversionSpecifier::InvalidSpecifier:
    657     case ConversionSpecifier::PrintErrno:
    658       return false;
    659   }
    660   llvm_unreachable("Invalid ConversionSpecifier Kind!");
    661 }
    662 
    663 bool FormatSpecifier::hasStandardLengthConversionCombination() const {
    664   if (LM.getKind() == LengthModifier::AsLongDouble) {
    665     switch(CS.getKind()) {
    666         case ConversionSpecifier::dArg:
    667         case ConversionSpecifier::iArg:
    668         case ConversionSpecifier::oArg:
    669         case ConversionSpecifier::uArg:
    670         case ConversionSpecifier::xArg:
    671         case ConversionSpecifier::XArg:
    672           return false;
    673         default:
    674           return true;
    675     }
    676   }
    677   return true;
    678 }
    679