Home | History | Annotate | Download | only in Analyses
      1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines APIs for analyzing the format strings of printf, fscanf,
     11 // and friends.
     12 //
     13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
     14 //
     15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
     20 #define LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
     21 
     22 #include "clang/AST/CanonicalType.h"
     23 
     24 namespace clang {
     25 
     26 class TargetInfo;
     27 
     28 //===----------------------------------------------------------------------===//
     29 /// Common components of both fprintf and fscanf format strings.
     30 namespace analyze_format_string {
     31 
     32 /// Class representing optional flags with location and representation
     33 /// information.
     34 class OptionalFlag {
     35 public:
     36   OptionalFlag(const char *Representation)
     37       : representation(Representation), flag(false) {}
     38   bool isSet() { return flag; }
     39   void set() { flag = true; }
     40   void clear() { flag = false; }
     41   void setPosition(const char *position) {
     42     assert(position);
     43     this->position = position;
     44   }
     45   const char *getPosition() const {
     46     assert(position);
     47     return position;
     48   }
     49   const char *toString() const { return representation; }
     50 
     51   // Overloaded operators for bool like qualities
     52   explicit operator bool() const { return flag; }
     53   OptionalFlag& operator=(const bool &rhs) {
     54     flag = rhs;
     55     return *this;  // Return a reference to myself.
     56   }
     57 private:
     58   const char *representation;
     59   const char *position;
     60   bool flag;
     61 };
     62 
     63 /// Represents the length modifier in a format string in scanf/printf.
     64 class LengthModifier {
     65 public:
     66   enum Kind {
     67     None,
     68     AsChar,       // 'hh'
     69     AsShort,      // 'h'
     70     AsLong,       // 'l'
     71     AsLongLong,   // 'll'
     72     AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
     73     AsIntMax,     // 'j'
     74     AsSizeT,      // 'z'
     75     AsPtrDiff,    // 't'
     76     AsInt32,      // 'I32' (MSVCRT, like __int32)
     77     AsInt3264,    // 'I'   (MSVCRT, like __int3264 from MIDL)
     78     AsInt64,      // 'I64' (MSVCRT, like __int64)
     79     AsLongDouble, // 'L'
     80     AsAllocate,   // for '%as', GNU extension to C90 scanf
     81     AsMAllocate,  // for '%ms', GNU extension to scanf
     82     AsWide,       // 'w' (MSVCRT, like l but only for c, C, s, S, or Z
     83     AsWideChar = AsLong // for '%ls', only makes sense for printf
     84   };
     85 
     86   LengthModifier()
     87     : Position(nullptr), kind(None) {}
     88   LengthModifier(const char *pos, Kind k)
     89     : Position(pos), kind(k) {}
     90 
     91   const char *getStart() const {
     92     return Position;
     93   }
     94 
     95   unsigned getLength() const {
     96     switch (kind) {
     97       default:
     98         return 1;
     99       case AsLongLong:
    100       case AsChar:
    101         return 2;
    102       case AsInt32:
    103       case AsInt64:
    104         return 3;
    105       case None:
    106         return 0;
    107     }
    108   }
    109 
    110   Kind getKind() const { return kind; }
    111   void setKind(Kind k) { kind = k; }
    112 
    113   const char *toString() const;
    114 
    115 private:
    116   const char *Position;
    117   Kind kind;
    118 };
    119 
    120 class ConversionSpecifier {
    121 public:
    122   enum Kind {
    123     InvalidSpecifier = 0,
    124       // C99 conversion specifiers.
    125     cArg,
    126     dArg,
    127     DArg, // Apple extension
    128     iArg,
    129     IntArgBeg = dArg, IntArgEnd = iArg,
    130 
    131     oArg,
    132     OArg, // Apple extension
    133     uArg,
    134     UArg, // Apple extension
    135     xArg,
    136     XArg,
    137     UIntArgBeg = oArg, UIntArgEnd = XArg,
    138 
    139     fArg,
    140     FArg,
    141     eArg,
    142     EArg,
    143     gArg,
    144     GArg,
    145     aArg,
    146     AArg,
    147     DoubleArgBeg = fArg, DoubleArgEnd = AArg,
    148 
    149     sArg,
    150     pArg,
    151     nArg,
    152     PercentArg,
    153     CArg,
    154     SArg,
    155 
    156     // ** Printf-specific **
    157 
    158     ZArg, // MS extension
    159 
    160     // Objective-C specific specifiers.
    161     ObjCObjArg,  // '@'
    162     ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
    163 
    164     // FreeBSD kernel specific specifiers.
    165     FreeBSDbArg,
    166     FreeBSDDArg,
    167     FreeBSDrArg,
    168     FreeBSDyArg,
    169 
    170     // GlibC specific specifiers.
    171     PrintErrno,   // 'm'
    172 
    173     PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
    174 
    175     // ** Scanf-specific **
    176     ScanListArg, // '['
    177     ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
    178   };
    179 
    180   ConversionSpecifier(bool isPrintf = true)
    181     : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr),
    182       kind(InvalidSpecifier) {}
    183 
    184   ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
    185     : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {}
    186 
    187   const char *getStart() const {
    188     return Position;
    189   }
    190 
    191   StringRef getCharacters() const {
    192     return StringRef(getStart(), getLength());
    193   }
    194 
    195   bool consumesDataArgument() const {
    196     switch (kind) {
    197       case PrintErrno:
    198         assert(IsPrintf);
    199         return false;
    200       case PercentArg:
    201         return false;
    202       default:
    203         return true;
    204     }
    205   }
    206 
    207   Kind getKind() const { return kind; }
    208   void setKind(Kind k) { kind = k; }
    209   unsigned getLength() const {
    210     return EndScanList ? EndScanList - Position : 1;
    211   }
    212 
    213   bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) ||
    214     kind == FreeBSDrArg || kind == FreeBSDyArg; }
    215   bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
    216   bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
    217   const char *toString() const;
    218 
    219   bool isPrintfKind() const { return IsPrintf; }
    220 
    221   Optional<ConversionSpecifier> getStandardSpecifier() const;
    222 
    223 protected:
    224   bool IsPrintf;
    225   const char *Position;
    226   const char *EndScanList;
    227   Kind kind;
    228 };
    229 
    230 class ArgType {
    231 public:
    232   enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
    233               AnyCharTy, CStrTy, WCStrTy, WIntTy };
    234 
    235   enum MatchKind { NoMatch = 0, Match = 1, NoMatchPedantic };
    236 
    237 private:
    238   const Kind K;
    239   QualType T;
    240   const char *Name;
    241   bool Ptr;
    242 public:
    243   ArgType(Kind k = UnknownTy, const char *n = nullptr)
    244       : K(k), Name(n), Ptr(false) {}
    245   ArgType(QualType t, const char *n = nullptr)
    246       : K(SpecificTy), T(t), Name(n), Ptr(false) {}
    247   ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {}
    248 
    249   static ArgType Invalid() { return ArgType(InvalidTy); }
    250   bool isValid() const { return K != InvalidTy; }
    251 
    252   /// Create an ArgType which corresponds to the type pointer to A.
    253   static ArgType PtrTo(const ArgType& A) {
    254     assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
    255     ArgType Res = A;
    256     Res.Ptr = true;
    257     return Res;
    258   }
    259 
    260   MatchKind matchesType(ASTContext &C, QualType argTy) const;
    261 
    262   QualType getRepresentativeType(ASTContext &C) const;
    263 
    264   std::string getRepresentativeTypeName(ASTContext &C) const;
    265 };
    266 
    267 class OptionalAmount {
    268 public:
    269   enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
    270 
    271   OptionalAmount(HowSpecified howSpecified,
    272                  unsigned amount,
    273                  const char *amountStart,
    274                  unsigned amountLength,
    275                  bool usesPositionalArg)
    276   : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
    277   UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
    278 
    279   OptionalAmount(bool valid = true)
    280   : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
    281   UsesPositionalArg(0), UsesDotPrefix(0) {}
    282 
    283   bool isInvalid() const {
    284     return hs == Invalid;
    285   }
    286 
    287   HowSpecified getHowSpecified() const { return hs; }
    288   void setHowSpecified(HowSpecified h) { hs = h; }
    289 
    290   bool hasDataArgument() const { return hs == Arg; }
    291 
    292   unsigned getArgIndex() const {
    293     assert(hasDataArgument());
    294     return amt;
    295   }
    296 
    297   unsigned getConstantAmount() const {
    298     assert(hs == Constant);
    299     return amt;
    300   }
    301 
    302   const char *getStart() const {
    303       // We include the . character if it is given.
    304     return start - UsesDotPrefix;
    305   }
    306 
    307   unsigned getConstantLength() const {
    308     assert(hs == Constant);
    309     return length + UsesDotPrefix;
    310   }
    311 
    312   ArgType getArgType(ASTContext &Ctx) const;
    313 
    314   void toString(raw_ostream &os) const;
    315 
    316   bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
    317   unsigned getPositionalArgIndex() const {
    318     assert(hasDataArgument());
    319     return amt + 1;
    320   }
    321 
    322   bool usesDotPrefix() const { return UsesDotPrefix; }
    323   void setUsesDotPrefix() { UsesDotPrefix = true; }
    324 
    325 private:
    326   const char *start;
    327   unsigned length;
    328   HowSpecified hs;
    329   unsigned amt;
    330   bool UsesPositionalArg : 1;
    331   bool UsesDotPrefix;
    332 };
    333 
    334 
    335 class FormatSpecifier {
    336 protected:
    337   LengthModifier LM;
    338   OptionalAmount FieldWidth;
    339   ConversionSpecifier CS;
    340   /// Positional arguments, an IEEE extension:
    341   ///  IEEE Std 1003.1, 2004 Edition
    342   ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
    343   bool UsesPositionalArg;
    344   unsigned argIndex;
    345 public:
    346   FormatSpecifier(bool isPrintf)
    347     : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
    348 
    349   void setLengthModifier(LengthModifier lm) {
    350     LM = lm;
    351   }
    352 
    353   void setUsesPositionalArg() { UsesPositionalArg = true; }
    354 
    355   void setArgIndex(unsigned i) {
    356     argIndex = i;
    357   }
    358 
    359   unsigned getArgIndex() const {
    360     return argIndex;
    361   }
    362 
    363   unsigned getPositionalArgIndex() const {
    364     return argIndex + 1;
    365   }
    366 
    367   const LengthModifier &getLengthModifier() const {
    368     return LM;
    369   }
    370 
    371   const OptionalAmount &getFieldWidth() const {
    372     return FieldWidth;
    373   }
    374 
    375   void setFieldWidth(const OptionalAmount &Amt) {
    376     FieldWidth = Amt;
    377   }
    378 
    379   bool usesPositionalArg() const { return UsesPositionalArg; }
    380 
    381   bool hasValidLengthModifier(const TargetInfo &Target) const;
    382 
    383   bool hasStandardLengthModifier() const;
    384 
    385   Optional<LengthModifier> getCorrectedLengthModifier() const;
    386 
    387   bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
    388 
    389   bool hasStandardLengthConversionCombination() const;
    390 
    391   /// For a TypedefType QT, if it is a named integer type such as size_t,
    392   /// assign the appropriate value to LM and return true.
    393   static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
    394 };
    395 
    396 } // end analyze_format_string namespace
    397 
    398 //===----------------------------------------------------------------------===//
    399 /// Pieces specific to fprintf format strings.
    400 
    401 namespace analyze_printf {
    402 
    403 class PrintfConversionSpecifier :
    404   public analyze_format_string::ConversionSpecifier  {
    405 public:
    406   PrintfConversionSpecifier()
    407     : ConversionSpecifier(true, nullptr, InvalidSpecifier) {}
    408 
    409   PrintfConversionSpecifier(const char *pos, Kind k)
    410     : ConversionSpecifier(true, pos, k) {}
    411 
    412   bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
    413   bool isDoubleArg() const { return kind >= DoubleArgBeg &&
    414                                     kind <= DoubleArgEnd; }
    415   unsigned getLength() const {
    416       // Conversion specifiers currently only are represented by
    417       // single characters, but we be flexible.
    418     return 1;
    419   }
    420 
    421   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
    422     return CS->isPrintfKind();
    423   }
    424 };
    425 
    426 using analyze_format_string::ArgType;
    427 using analyze_format_string::LengthModifier;
    428 using analyze_format_string::OptionalAmount;
    429 using analyze_format_string::OptionalFlag;
    430 
    431 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
    432   OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
    433   OptionalFlag IsLeftJustified; // '-'
    434   OptionalFlag HasPlusPrefix; // '+'
    435   OptionalFlag HasSpacePrefix; // ' '
    436   OptionalFlag HasAlternativeForm; // '#'
    437   OptionalFlag HasLeadingZeroes; // '0'
    438   OptionalAmount Precision;
    439 public:
    440   PrintfSpecifier() :
    441     FormatSpecifier(/* isPrintf = */ true),
    442     HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
    443     HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
    444 
    445   static PrintfSpecifier Parse(const char *beg, const char *end);
    446 
    447     // Methods for incrementally constructing the PrintfSpecifier.
    448   void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
    449     CS = cs;
    450   }
    451   void setHasThousandsGrouping(const char *position) {
    452     HasThousandsGrouping = true;
    453     HasThousandsGrouping.setPosition(position);
    454   }
    455   void setIsLeftJustified(const char *position) {
    456     IsLeftJustified = true;
    457     IsLeftJustified.setPosition(position);
    458   }
    459   void setHasPlusPrefix(const char *position) {
    460     HasPlusPrefix = true;
    461     HasPlusPrefix.setPosition(position);
    462   }
    463   void setHasSpacePrefix(const char *position) {
    464     HasSpacePrefix = true;
    465     HasSpacePrefix.setPosition(position);
    466   }
    467   void setHasAlternativeForm(const char *position) {
    468     HasAlternativeForm = true;
    469     HasAlternativeForm.setPosition(position);
    470   }
    471   void setHasLeadingZeros(const char *position) {
    472     HasLeadingZeroes = true;
    473     HasLeadingZeroes.setPosition(position);
    474   }
    475   void setUsesPositionalArg() { UsesPositionalArg = true; }
    476 
    477     // Methods for querying the format specifier.
    478 
    479   const PrintfConversionSpecifier &getConversionSpecifier() const {
    480     return cast<PrintfConversionSpecifier>(CS);
    481   }
    482 
    483   void setPrecision(const OptionalAmount &Amt) {
    484     Precision = Amt;
    485     Precision.setUsesDotPrefix();
    486   }
    487 
    488   const OptionalAmount &getPrecision() const {
    489     return Precision;
    490   }
    491 
    492   bool consumesDataArgument() const {
    493     return getConversionSpecifier().consumesDataArgument();
    494   }
    495 
    496   /// \brief Returns the builtin type that a data argument
    497   /// paired with this format specifier should have.  This method
    498   /// will return null if the format specifier does not have
    499   /// a matching data argument or the matching argument matches
    500   /// more than one type.
    501   ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
    502 
    503   const OptionalFlag &hasThousandsGrouping() const {
    504       return HasThousandsGrouping;
    505   }
    506   const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
    507   const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
    508   const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
    509   const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
    510   const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
    511   bool usesPositionalArg() const { return UsesPositionalArg; }
    512 
    513   /// Changes the specifier and length according to a QualType, retaining any
    514   /// flags or options. Returns true on success, or false when a conversion
    515   /// was not successful.
    516   bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
    517                bool IsObjCLiteral);
    518 
    519   void toString(raw_ostream &os) const;
    520 
    521   // Validation methods - to check if any element results in undefined behavior
    522   bool hasValidPlusPrefix() const;
    523   bool hasValidAlternativeForm() const;
    524   bool hasValidLeadingZeros() const;
    525   bool hasValidSpacePrefix() const;
    526   bool hasValidLeftJustified() const;
    527   bool hasValidThousandsGroupingPrefix() const;
    528 
    529   bool hasValidPrecision() const;
    530   bool hasValidFieldWidth() const;
    531 };
    532 }  // end analyze_printf namespace
    533 
    534 //===----------------------------------------------------------------------===//
    535 /// Pieces specific to fscanf format strings.
    536 
    537 namespace analyze_scanf {
    538 
    539 class ScanfConversionSpecifier :
    540     public analyze_format_string::ConversionSpecifier  {
    541 public:
    542   ScanfConversionSpecifier()
    543     : ConversionSpecifier(false, nullptr, InvalidSpecifier) {}
    544 
    545   ScanfConversionSpecifier(const char *pos, Kind k)
    546     : ConversionSpecifier(false, pos, k) {}
    547 
    548   void setEndScanList(const char *pos) { EndScanList = pos; }
    549 
    550   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
    551     return !CS->isPrintfKind();
    552   }
    553 };
    554 
    555 using analyze_format_string::ArgType;
    556 using analyze_format_string::LengthModifier;
    557 using analyze_format_string::OptionalAmount;
    558 using analyze_format_string::OptionalFlag;
    559 
    560 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
    561   OptionalFlag SuppressAssignment; // '*'
    562 public:
    563   ScanfSpecifier() :
    564     FormatSpecifier(/* isPrintf = */ false),
    565     SuppressAssignment("*") {}
    566 
    567   void setSuppressAssignment(const char *position) {
    568     SuppressAssignment = true;
    569     SuppressAssignment.setPosition(position);
    570   }
    571 
    572   const OptionalFlag &getSuppressAssignment() const {
    573     return SuppressAssignment;
    574   }
    575 
    576   void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
    577     CS = cs;
    578   }
    579 
    580   const ScanfConversionSpecifier &getConversionSpecifier() const {
    581     return cast<ScanfConversionSpecifier>(CS);
    582   }
    583 
    584   bool consumesDataArgument() const {
    585     return CS.consumesDataArgument() && !SuppressAssignment;
    586   }
    587 
    588   ArgType getArgType(ASTContext &Ctx) const;
    589 
    590   bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt,
    591                ASTContext &Ctx);
    592 
    593   void toString(raw_ostream &os) const;
    594 
    595   static ScanfSpecifier Parse(const char *beg, const char *end);
    596 };
    597 
    598 } // end analyze_scanf namespace
    599 
    600 //===----------------------------------------------------------------------===//
    601 // Parsing and processing of format strings (both fprintf and fscanf).
    602 
    603 namespace analyze_format_string {
    604 
    605 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
    606 
    607 class FormatStringHandler {
    608 public:
    609   FormatStringHandler() {}
    610   virtual ~FormatStringHandler();
    611 
    612   virtual void HandleNullChar(const char *nullCharacter) {}
    613 
    614   virtual void HandlePosition(const char *startPos, unsigned posLen) {}
    615 
    616   virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
    617                                      PositionContext p) {}
    618 
    619   virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
    620 
    621   virtual void HandleIncompleteSpecifier(const char *startSpecifier,
    622                                          unsigned specifierLen) {}
    623 
    624   // Printf-specific handlers.
    625 
    626   virtual bool HandleInvalidPrintfConversionSpecifier(
    627                                       const analyze_printf::PrintfSpecifier &FS,
    628                                       const char *startSpecifier,
    629                                       unsigned specifierLen) {
    630     return true;
    631   }
    632 
    633   virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
    634                                      const char *startSpecifier,
    635                                      unsigned specifierLen) {
    636     return true;
    637   }
    638 
    639     // Scanf-specific handlers.
    640 
    641   virtual bool HandleInvalidScanfConversionSpecifier(
    642                                         const analyze_scanf::ScanfSpecifier &FS,
    643                                         const char *startSpecifier,
    644                                         unsigned specifierLen) {
    645     return true;
    646   }
    647 
    648   virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
    649                                     const char *startSpecifier,
    650                                     unsigned specifierLen) {
    651     return true;
    652   }
    653 
    654   virtual void HandleIncompleteScanList(const char *start, const char *end) {}
    655 };
    656 
    657 bool ParsePrintfString(FormatStringHandler &H,
    658                        const char *beg, const char *end, const LangOptions &LO,
    659                        const TargetInfo &Target, bool isFreeBSDKPrintf);
    660 
    661 bool ParseFormatStringHasSArg(const char *beg, const char *end, const LangOptions &LO,
    662                               const TargetInfo &Target);
    663 
    664 bool ParseScanfString(FormatStringHandler &H,
    665                       const char *beg, const char *end, const LangOptions &LO,
    666                       const TargetInfo &Target);
    667 
    668 } // end analyze_format_string namespace
    669 } // end clang namespace
    670 #endif
    671