Home | History | Annotate | Download | only in Analysis
      1 // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*-
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Shared details for processing format strings of printf and scanf
     11 // (and friends).
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "FormatStringParsing.h"
     16 
     17 using clang::analyze_format_string::ArgTypeResult;
     18 using clang::analyze_format_string::FormatStringHandler;
     19 using clang::analyze_format_string::FormatSpecifier;
     20 using clang::analyze_format_string::LengthModifier;
     21 using clang::analyze_format_string::OptionalAmount;
     22 using clang::analyze_format_string::PositionContext;
     23 using clang::analyze_format_string::ConversionSpecifier;
     24 using namespace clang;
     25 
     26 // Key function to FormatStringHandler.
     27 FormatStringHandler::~FormatStringHandler() {}
     28 
     29 //===----------------------------------------------------------------------===//
     30 // Functions for parsing format strings components in both printf and
     31 // scanf format strings.
     32 //===----------------------------------------------------------------------===//
     33 
     34 OptionalAmount
     35 clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
     36   const char *I = Beg;
     37   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
     38 
     39   unsigned accumulator = 0;
     40   bool hasDigits = false;
     41 
     42   for ( ; I != E; ++I) {
     43     char c = *I;
     44     if (c >= '0' && c <= '9') {
     45       hasDigits = true;
     46       accumulator = (accumulator * 10) + (c - '0');
     47       continue;
     48     }
     49 
     50     if (hasDigits)
     51       return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
     52           false);
     53 
     54     break;
     55   }
     56 
     57   return OptionalAmount();
     58 }
     59 
     60 OptionalAmount
     61 clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
     62                                                      const char *E,
     63                                                      unsigned &argIndex) {
     64   if (*Beg == '*') {
     65     ++Beg;
     66     return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
     67   }
     68 
     69   return ParseAmount(Beg, E);
     70 }
     71 
     72 OptionalAmount
     73 clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
     74                                                   const char *Start,
     75                                                   const char *&Beg,
     76                                                   const char *E,
     77                                                   PositionContext p) {
     78   if (*Beg == '*') {
     79     const char *I = Beg + 1;
     80     const OptionalAmount &Amt = ParseAmount(I, E);
     81 
     82     if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
     83       H.HandleInvalidPosition(Beg, I - Beg, p);
     84       return OptionalAmount(false);
     85     }
     86 
     87     if (I == E) {
     88       // No more characters left?
     89       H.HandleIncompleteSpecifier(Start, E - Start);
     90       return OptionalAmount(false);
     91     }
     92 
     93     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
     94 
     95     if (*I == '$') {
     96       // Handle positional arguments
     97 
     98       // Special case: '*0$', since this is an easy mistake.
     99       if (Amt.getConstantAmount() == 0) {
    100         H.HandleZeroPosition(Beg, I - Beg + 1);
    101         return OptionalAmount(false);
    102       }
    103 
    104       const char *Tmp = Beg;
    105       Beg = ++I;
    106 
    107       return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
    108                             Tmp, 0, true);
    109     }
    110 
    111     H.HandleInvalidPosition(Beg, I - Beg, p);
    112     return OptionalAmount(false);
    113   }
    114 
    115   return ParseAmount(Beg, E);
    116 }
    117 
    118 
    119 bool
    120 clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
    121                                               FormatSpecifier &CS,
    122                                               const char *Start,
    123                                               const char *&Beg, const char *E,
    124                                               unsigned *argIndex) {
    125   // FIXME: Support negative field widths.
    126   if (argIndex) {
    127     CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
    128   }
    129   else {
    130     const OptionalAmount Amt =
    131       ParsePositionAmount(H, Start, Beg, E,
    132                           analyze_format_string::FieldWidthPos);
    133 
    134     if (Amt.isInvalid())
    135       return true;
    136     CS.setFieldWidth(Amt);
    137   }
    138   return false;
    139 }
    140 
    141 bool
    142 clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
    143                                                FormatSpecifier &FS,
    144                                                const char *Start,
    145                                                const char *&Beg,
    146                                                const char *E) {
    147   const char *I = Beg;
    148 
    149   const OptionalAmount &Amt = ParseAmount(I, E);
    150 
    151   if (I == E) {
    152     // No more characters left?
    153     H.HandleIncompleteSpecifier(Start, E - Start);
    154     return true;
    155   }
    156 
    157   if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
    158     // Special case: '%0$', since this is an easy mistake.
    159     if (Amt.getConstantAmount() == 0) {
    160       H.HandleZeroPosition(Start, I - Start);
    161       return true;
    162     }
    163 
    164     FS.setArgIndex(Amt.getConstantAmount() - 1);
    165     FS.setUsesPositionalArg();
    166     // Update the caller's pointer if we decided to consume
    167     // these characters.
    168     Beg = I;
    169     return false;
    170   }
    171 
    172   return false;
    173 }
    174 
    175 bool
    176 clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
    177                                                   const char *&I,
    178                                                   const char *E) {
    179   LengthModifier::Kind lmKind = LengthModifier::None;
    180   const char *lmPosition = I;
    181   switch (*I) {
    182     default:
    183       return false;
    184     case 'h':
    185       ++I;
    186       lmKind = (I != E && *I == 'h') ?
    187       ++I, LengthModifier::AsChar : LengthModifier::AsShort;
    188       break;
    189     case 'l':
    190       ++I;
    191       lmKind = (I != E && *I == 'l') ?
    192       ++I, LengthModifier::AsLongLong : LengthModifier::AsLong;
    193       break;
    194     case 'j': lmKind = LengthModifier::AsIntMax;     ++I; break;
    195     case 'z': lmKind = LengthModifier::AsSizeT;      ++I; break;
    196     case 't': lmKind = LengthModifier::AsPtrDiff;    ++I; break;
    197     case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
    198     case 'q': lmKind = LengthModifier::AsLongLong;   ++I; break;
    199   }
    200   LengthModifier lm(lmPosition, lmKind);
    201   FS.setLengthModifier(lm);
    202   return true;
    203 }
    204 
    205 //===----------------------------------------------------------------------===//
    206 // Methods on ArgTypeResult.
    207 //===----------------------------------------------------------------------===//
    208 
    209 bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
    210   switch (K) {
    211     case InvalidTy:
    212       llvm_unreachable("ArgTypeResult must be valid");
    213 
    214     case UnknownTy:
    215       return true;
    216 
    217     case SpecificTy: {
    218       argTy = C.getCanonicalType(argTy).getUnqualifiedType();
    219       if (T == argTy)
    220         return true;
    221       // Check for "compatible types".
    222       if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
    223         switch (BT->getKind()) {
    224           default:
    225             break;
    226           case BuiltinType::Char_S:
    227           case BuiltinType::SChar:
    228             return T == C.UnsignedCharTy;
    229           case BuiltinType::Char_U:
    230           case BuiltinType::UChar:
    231             return T == C.SignedCharTy;
    232           case BuiltinType::Short:
    233             return T == C.UnsignedShortTy;
    234           case BuiltinType::UShort:
    235             return T == C.ShortTy;
    236           case BuiltinType::Int:
    237             return T == C.UnsignedIntTy;
    238           case BuiltinType::UInt:
    239             return T == C.IntTy;
    240           case BuiltinType::Long:
    241             return T == C.UnsignedLongTy;
    242           case BuiltinType::ULong:
    243             return T == C.LongTy;
    244           case BuiltinType::LongLong:
    245             return T == C.UnsignedLongLongTy;
    246           case BuiltinType::ULongLong:
    247             return T == C.LongLongTy;
    248         }
    249       return false;
    250     }
    251 
    252     case CStrTy: {
    253       const PointerType *PT = argTy->getAs<PointerType>();
    254       if (!PT)
    255         return false;
    256       QualType pointeeTy = PT->getPointeeType();
    257       if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
    258         switch (BT->getKind()) {
    259           case BuiltinType::Void:
    260           case BuiltinType::Char_U:
    261           case BuiltinType::UChar:
    262           case BuiltinType::Char_S:
    263           case BuiltinType::SChar:
    264             return true;
    265           default:
    266             break;
    267         }
    268 
    269       return false;
    270     }
    271 
    272     case WCStrTy: {
    273       const PointerType *PT = argTy->getAs<PointerType>();
    274       if (!PT)
    275         return false;
    276       QualType pointeeTy =
    277         C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
    278       return pointeeTy == C.getWCharType();
    279     }
    280 
    281     case WIntTy: {
    282       // Instead of doing a lookup for the definition of 'wint_t' (which
    283       // is defined by the system headers) instead see if wchar_t and
    284       // the argument type promote to the same type.
    285       QualType PromoWChar =
    286         C.getWCharType()->isPromotableIntegerType()
    287           ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType();
    288       QualType PromoArg =
    289         argTy->isPromotableIntegerType()
    290           ? C.getPromotedIntegerType(argTy) : argTy;
    291 
    292       PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType();
    293       PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType();
    294 
    295       return PromoWChar == PromoArg;
    296     }
    297 
    298     case CPointerTy:
    299       return argTy->isPointerType() || argTy->isObjCObjectPointerType() ||
    300         argTy->isNullPtrType();
    301 
    302     case ObjCPointerTy:
    303       return argTy->getAs<ObjCObjectPointerType>() != NULL;
    304   }
    305 
    306   // FIXME: Should be unreachable, but Clang is currently emitting
    307   // a warning.
    308   return false;
    309 }
    310 
    311 QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
    312   switch (K) {
    313     case InvalidTy:
    314       llvm_unreachable("No representative type for Invalid ArgTypeResult");
    315     case UnknownTy:
    316       return QualType();
    317     case SpecificTy:
    318       return T;
    319     case CStrTy:
    320       return C.getPointerType(C.CharTy);
    321     case WCStrTy:
    322       return C.getPointerType(C.getWCharType());
    323     case ObjCPointerTy:
    324       return C.ObjCBuiltinIdTy;
    325     case CPointerTy:
    326       return C.VoidPtrTy;
    327     case WIntTy: {
    328       QualType WC = C.getWCharType();
    329       return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC;
    330     }
    331   }
    332 
    333   // FIXME: Should be unreachable, but Clang is currently emitting
    334   // a warning.
    335   return QualType();
    336 }
    337 
    338 //===----------------------------------------------------------------------===//
    339 // Methods on OptionalAmount.
    340 //===----------------------------------------------------------------------===//
    341 
    342 ArgTypeResult
    343 analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const {
    344   return Ctx.IntTy;
    345 }
    346 
    347 //===----------------------------------------------------------------------===//
    348 // Methods on LengthModifier.
    349 //===----------------------------------------------------------------------===//
    350 
    351 const char *
    352 analyze_format_string::LengthModifier::toString() const {
    353   switch (kind) {
    354   case AsChar:
    355     return "hh";
    356   case AsShort:
    357     return "h";
    358   case AsLong: // or AsWideChar
    359     return "l";
    360   case AsLongLong:
    361     return "ll";
    362   case AsIntMax:
    363     return "j";
    364   case AsSizeT:
    365     return "z";
    366   case AsPtrDiff:
    367     return "t";
    368   case AsLongDouble:
    369     return "L";
    370   case None:
    371     return "";
    372   }
    373   return NULL;
    374 }
    375 
    376 //===----------------------------------------------------------------------===//
    377 // Methods on OptionalAmount.
    378 //===----------------------------------------------------------------------===//
    379 
    380 void OptionalAmount::toString(raw_ostream &os) const {
    381   switch (hs) {
    382   case Invalid:
    383   case NotSpecified:
    384     return;
    385   case Arg:
    386     if (UsesDotPrefix)
    387         os << ".";
    388     if (usesPositionalArg())
    389       os << "*" << getPositionalArgIndex() << "$";
    390     else
    391       os << "*";
    392     break;
    393   case Constant:
    394     if (UsesDotPrefix)
    395         os << ".";
    396     os << amt;
    397     break;
    398   }
    399 }
    400 
    401 //===----------------------------------------------------------------------===//
    402 // Methods on ConversionSpecifier.
    403 //===----------------------------------------------------------------------===//
    404 
    405 bool FormatSpecifier::hasValidLengthModifier() const {
    406   switch (LM.getKind()) {
    407     case LengthModifier::None:
    408       return true;
    409 
    410         // Handle most integer flags
    411     case LengthModifier::AsChar:
    412     case LengthModifier::AsShort:
    413     case LengthModifier::AsLongLong:
    414     case LengthModifier::AsIntMax:
    415     case LengthModifier::AsSizeT:
    416     case LengthModifier::AsPtrDiff:
    417       switch (CS.getKind()) {
    418         case ConversionSpecifier::dArg:
    419         case ConversionSpecifier::iArg:
    420         case ConversionSpecifier::oArg:
    421         case ConversionSpecifier::uArg:
    422         case ConversionSpecifier::xArg:
    423         case ConversionSpecifier::XArg:
    424         case ConversionSpecifier::nArg:
    425           return true;
    426         default:
    427           return false;
    428       }
    429 
    430         // Handle 'l' flag
    431     case LengthModifier::AsLong:
    432       switch (CS.getKind()) {
    433         case ConversionSpecifier::dArg:
    434         case ConversionSpecifier::iArg:
    435         case ConversionSpecifier::oArg:
    436         case ConversionSpecifier::uArg:
    437         case ConversionSpecifier::xArg:
    438         case ConversionSpecifier::XArg:
    439         case ConversionSpecifier::aArg:
    440         case ConversionSpecifier::AArg:
    441         case ConversionSpecifier::fArg:
    442         case ConversionSpecifier::FArg:
    443         case ConversionSpecifier::eArg:
    444         case ConversionSpecifier::EArg:
    445         case ConversionSpecifier::gArg:
    446         case ConversionSpecifier::GArg:
    447         case ConversionSpecifier::nArg:
    448         case ConversionSpecifier::cArg:
    449         case ConversionSpecifier::sArg:
    450           return true;
    451         default:
    452           return false;
    453       }
    454 
    455     case LengthModifier::AsLongDouble:
    456       switch (CS.getKind()) {
    457         case ConversionSpecifier::aArg:
    458         case ConversionSpecifier::AArg:
    459         case ConversionSpecifier::fArg:
    460         case ConversionSpecifier::FArg:
    461         case ConversionSpecifier::eArg:
    462         case ConversionSpecifier::EArg:
    463         case ConversionSpecifier::gArg:
    464         case ConversionSpecifier::GArg:
    465           return true;
    466         default:
    467           return false;
    468       }
    469   }
    470   return false;
    471 }
    472 
    473 
    474