Home | History | Annotate | Download | only in Analyses
      1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines APIs for analyzing the format strings of printf, fscanf,
     11 // and friends.
     12 //
     13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
     14 //
     15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #ifndef LLVM_CLANG_FORMAT_H
     20 #define LLVM_CLANG_FORMAT_H
     21 
     22 #include "clang/AST/CanonicalType.h"
     23 
     24 namespace clang {
     25 
     26 //===----------------------------------------------------------------------===//
     27 /// Common components of both fprintf and fscanf format strings.
     28 namespace analyze_format_string {
     29 
     30 /// Class representing optional flags with location and representation
     31 /// information.
     32 class OptionalFlag {
     33 public:
     34   OptionalFlag(const char *Representation)
     35       : representation(Representation), flag(false) {}
     36   bool isSet() { return flag; }
     37   void set() { flag = true; }
     38   void clear() { flag = false; }
     39   void setPosition(const char *position) {
     40     assert(position);
     41     this->position = position;
     42   }
     43   const char *getPosition() const {
     44     assert(position);
     45     return position;
     46   }
     47   const char *toString() const { return representation; }
     48 
     49   // Overloaded operators for bool like qualities
     50   operator bool() const { return flag; }
     51   OptionalFlag& operator=(const bool &rhs) {
     52     flag = rhs;
     53     return *this;  // Return a reference to myself.
     54   }
     55 private:
     56   const char *representation;
     57   const char *position;
     58   bool flag;
     59 };
     60 
     61 /// Represents the length modifier in a format string in scanf/printf.
     62 class LengthModifier {
     63 public:
     64   enum Kind {
     65     None,
     66     AsChar,       // 'hh'
     67     AsShort,      // 'h'
     68     AsLong,       // 'l'
     69     AsLongLong,   // 'll'
     70     AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
     71     AsIntMax,     // 'j'
     72     AsSizeT,      // 'z'
     73     AsPtrDiff,    // 't'
     74     AsLongDouble, // 'L'
     75     AsAllocate,   // for '%as', GNU extension to C90 scanf
     76     AsMAllocate,  // for '%ms', GNU extension to scanf
     77     AsWideChar = AsLong // for '%ls', only makes sense for printf
     78   };
     79 
     80   LengthModifier()
     81     : Position(0), kind(None) {}
     82   LengthModifier(const char *pos, Kind k)
     83     : Position(pos), kind(k) {}
     84 
     85   const char *getStart() const {
     86     return Position;
     87   }
     88 
     89   unsigned getLength() const {
     90     switch (kind) {
     91       default:
     92         return 1;
     93       case AsLongLong:
     94       case AsChar:
     95         return 2;
     96       case None:
     97         return 0;
     98     }
     99   }
    100 
    101   Kind getKind() const { return kind; }
    102   void setKind(Kind k) { kind = k; }
    103 
    104   const char *toString() const;
    105 
    106 private:
    107   const char *Position;
    108   Kind kind;
    109 };
    110 
    111 class ConversionSpecifier {
    112 public:
    113   enum Kind {
    114     InvalidSpecifier = 0,
    115       // C99 conversion specifiers.
    116     cArg,
    117     dArg,
    118     iArg,
    119     IntArgBeg = cArg, IntArgEnd = iArg,
    120 
    121     oArg,
    122     uArg,
    123     xArg,
    124     XArg,
    125     UIntArgBeg = oArg, UIntArgEnd = XArg,
    126 
    127     fArg,
    128     FArg,
    129     eArg,
    130     EArg,
    131     gArg,
    132     GArg,
    133     aArg,
    134     AArg,
    135     DoubleArgBeg = fArg, DoubleArgEnd = AArg,
    136 
    137     sArg,
    138     pArg,
    139     nArg,
    140     PercentArg,
    141     CArg,
    142     SArg,
    143 
    144     // ** Printf-specific **
    145 
    146     // Objective-C specific specifiers.
    147     ObjCObjArg,  // '@'
    148     ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
    149 
    150     // GlibC specific specifiers.
    151     PrintErrno,   // 'm'
    152 
    153     PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
    154 
    155     // ** Scanf-specific **
    156     ScanListArg, // '['
    157     ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
    158   };
    159 
    160   ConversionSpecifier(bool isPrintf)
    161     : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
    162 
    163   ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
    164     : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
    165 
    166   const char *getStart() const {
    167     return Position;
    168   }
    169 
    170   StringRef getCharacters() const {
    171     return StringRef(getStart(), getLength());
    172   }
    173 
    174   bool consumesDataArgument() const {
    175     switch (kind) {
    176       case PrintErrno:
    177         assert(IsPrintf);
    178       case PercentArg:
    179         return false;
    180       default:
    181         return true;
    182     }
    183   }
    184 
    185   Kind getKind() const { return kind; }
    186   void setKind(Kind k) { kind = k; }
    187   unsigned getLength() const {
    188     return EndScanList ? EndScanList - Position : 1;
    189   }
    190 
    191   bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
    192   const char *toString() const;
    193 
    194   bool isPrintfKind() const { return IsPrintf; }
    195 
    196 protected:
    197   bool IsPrintf;
    198   const char *Position;
    199   const char *EndScanList;
    200   Kind kind;
    201 };
    202 
    203 class ArgTypeResult {
    204 public:
    205   enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
    206               AnyCharTy, CStrTy, WCStrTy, WIntTy };
    207 private:
    208   const Kind K;
    209   QualType T;
    210   const char *Name;
    211   ArgTypeResult(bool) : K(InvalidTy), Name(0) {}
    212 public:
    213   ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {}
    214   ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {}
    215   ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {}
    216   ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n)  {}
    217   ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {}
    218 
    219   static ArgTypeResult Invalid() { return ArgTypeResult(true); }
    220 
    221   bool isValid() const { return K != InvalidTy; }
    222 
    223   const QualType *getSpecificType() const {
    224     return K == SpecificTy ? &T : 0;
    225   }
    226 
    227   bool matchesType(ASTContext &C, QualType argTy) const;
    228 
    229   bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
    230 
    231   QualType getRepresentativeType(ASTContext &C) const;
    232 
    233   std::string getRepresentativeTypeName(ASTContext &C) const;
    234 };
    235 
    236 class OptionalAmount {
    237 public:
    238   enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
    239 
    240   OptionalAmount(HowSpecified howSpecified,
    241                  unsigned amount,
    242                  const char *amountStart,
    243                  unsigned amountLength,
    244                  bool usesPositionalArg)
    245   : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
    246   UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
    247 
    248   OptionalAmount(bool valid = true)
    249   : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
    250   UsesPositionalArg(0), UsesDotPrefix(0) {}
    251 
    252   bool isInvalid() const {
    253     return hs == Invalid;
    254   }
    255 
    256   HowSpecified getHowSpecified() const { return hs; }
    257   void setHowSpecified(HowSpecified h) { hs = h; }
    258 
    259   bool hasDataArgument() const { return hs == Arg; }
    260 
    261   unsigned getArgIndex() const {
    262     assert(hasDataArgument());
    263     return amt;
    264   }
    265 
    266   unsigned getConstantAmount() const {
    267     assert(hs == Constant);
    268     return amt;
    269   }
    270 
    271   const char *getStart() const {
    272       // We include the . character if it is given.
    273     return start - UsesDotPrefix;
    274   }
    275 
    276   unsigned getConstantLength() const {
    277     assert(hs == Constant);
    278     return length + UsesDotPrefix;
    279   }
    280 
    281   ArgTypeResult getArgType(ASTContext &Ctx) const;
    282 
    283   void toString(raw_ostream &os) const;
    284 
    285   bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
    286   unsigned getPositionalArgIndex() const {
    287     assert(hasDataArgument());
    288     return amt + 1;
    289   }
    290 
    291   bool usesDotPrefix() const { return UsesDotPrefix; }
    292   void setUsesDotPrefix() { UsesDotPrefix = true; }
    293 
    294 private:
    295   const char *start;
    296   unsigned length;
    297   HowSpecified hs;
    298   unsigned amt;
    299   bool UsesPositionalArg : 1;
    300   bool UsesDotPrefix;
    301 };
    302 
    303 
    304 class FormatSpecifier {
    305 protected:
    306   LengthModifier LM;
    307   OptionalAmount FieldWidth;
    308   ConversionSpecifier CS;
    309   /// Positional arguments, an IEEE extension:
    310   ///  IEEE Std 1003.1, 2004 Edition
    311   ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
    312   bool UsesPositionalArg;
    313   unsigned argIndex;
    314 public:
    315   FormatSpecifier(bool isPrintf)
    316     : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
    317 
    318   void setLengthModifier(LengthModifier lm) {
    319     LM = lm;
    320   }
    321 
    322   void setUsesPositionalArg() { UsesPositionalArg = true; }
    323 
    324   void setArgIndex(unsigned i) {
    325     argIndex = i;
    326   }
    327 
    328   unsigned getArgIndex() const {
    329     return argIndex;
    330   }
    331 
    332   unsigned getPositionalArgIndex() const {
    333     return argIndex + 1;
    334   }
    335 
    336   const LengthModifier &getLengthModifier() const {
    337     return LM;
    338   }
    339 
    340   const OptionalAmount &getFieldWidth() const {
    341     return FieldWidth;
    342   }
    343 
    344   void setFieldWidth(const OptionalAmount &Amt) {
    345     FieldWidth = Amt;
    346   }
    347 
    348   bool usesPositionalArg() const { return UsesPositionalArg; }
    349 
    350   bool hasValidLengthModifier() const;
    351 
    352   bool hasStandardLengthModifier() const;
    353 
    354   bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
    355 
    356   bool hasStandardLengthConversionCombination() const;
    357 };
    358 
    359 } // end analyze_format_string namespace
    360 
    361 //===----------------------------------------------------------------------===//
    362 /// Pieces specific to fprintf format strings.
    363 
    364 namespace analyze_printf {
    365 
    366 class PrintfConversionSpecifier :
    367   public analyze_format_string::ConversionSpecifier  {
    368 public:
    369   PrintfConversionSpecifier()
    370     : ConversionSpecifier(true, 0, InvalidSpecifier) {}
    371 
    372   PrintfConversionSpecifier(const char *pos, Kind k)
    373     : ConversionSpecifier(true, pos, k) {}
    374 
    375   bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
    376   bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
    377   bool isDoubleArg() const { return kind >= DoubleArgBeg &&
    378                                     kind <= DoubleArgEnd; }
    379   unsigned getLength() const {
    380       // Conversion specifiers currently only are represented by
    381       // single characters, but we be flexible.
    382     return 1;
    383   }
    384 
    385   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
    386     return CS->isPrintfKind();
    387   }
    388 };
    389 
    390 using analyze_format_string::ArgTypeResult;
    391 using analyze_format_string::LengthModifier;
    392 using analyze_format_string::OptionalAmount;
    393 using analyze_format_string::OptionalFlag;
    394 
    395 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
    396   OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
    397   OptionalFlag IsLeftJustified; // '-'
    398   OptionalFlag HasPlusPrefix; // '+'
    399   OptionalFlag HasSpacePrefix; // ' '
    400   OptionalFlag HasAlternativeForm; // '#'
    401   OptionalFlag HasLeadingZeroes; // '0'
    402   OptionalAmount Precision;
    403 public:
    404   PrintfSpecifier() :
    405     FormatSpecifier(/* isPrintf = */ true),
    406     HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
    407     HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
    408 
    409   static PrintfSpecifier Parse(const char *beg, const char *end);
    410 
    411     // Methods for incrementally constructing the PrintfSpecifier.
    412   void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
    413     CS = cs;
    414   }
    415   void setHasThousandsGrouping(const char *position) {
    416     HasThousandsGrouping = true;
    417     HasThousandsGrouping.setPosition(position);
    418   }
    419   void setIsLeftJustified(const char *position) {
    420     IsLeftJustified = true;
    421     IsLeftJustified.setPosition(position);
    422   }
    423   void setHasPlusPrefix(const char *position) {
    424     HasPlusPrefix = true;
    425     HasPlusPrefix.setPosition(position);
    426   }
    427   void setHasSpacePrefix(const char *position) {
    428     HasSpacePrefix = true;
    429     HasSpacePrefix.setPosition(position);
    430   }
    431   void setHasAlternativeForm(const char *position) {
    432     HasAlternativeForm = true;
    433     HasAlternativeForm.setPosition(position);
    434   }
    435   void setHasLeadingZeros(const char *position) {
    436     HasLeadingZeroes = true;
    437     HasLeadingZeroes.setPosition(position);
    438   }
    439   void setUsesPositionalArg() { UsesPositionalArg = true; }
    440 
    441     // Methods for querying the format specifier.
    442 
    443   const PrintfConversionSpecifier &getConversionSpecifier() const {
    444     return cast<PrintfConversionSpecifier>(CS);
    445   }
    446 
    447   void setPrecision(const OptionalAmount &Amt) {
    448     Precision = Amt;
    449     Precision.setUsesDotPrefix();
    450   }
    451 
    452   const OptionalAmount &getPrecision() const {
    453     return Precision;
    454   }
    455 
    456   bool consumesDataArgument() const {
    457     return getConversionSpecifier().consumesDataArgument();
    458   }
    459 
    460   /// \brief Returns the builtin type that a data argument
    461   /// paired with this format specifier should have.  This method
    462   /// will return null if the format specifier does not have
    463   /// a matching data argument or the matching argument matches
    464   /// more than one type.
    465   ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
    466 
    467   const OptionalFlag &hasThousandsGrouping() const {
    468       return HasThousandsGrouping;
    469   }
    470   const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
    471   const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
    472   const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
    473   const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
    474   const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
    475   bool usesPositionalArg() const { return UsesPositionalArg; }
    476 
    477   /// Changes the specifier and length according to a QualType, retaining any
    478   /// flags or options. Returns true on success, or false when a conversion
    479   /// was not successful.
    480   bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
    481                bool IsObjCLiteral);
    482 
    483   void toString(raw_ostream &os) const;
    484 
    485   // Validation methods - to check if any element results in undefined behavior
    486   bool hasValidPlusPrefix() const;
    487   bool hasValidAlternativeForm() const;
    488   bool hasValidLeadingZeros() const;
    489   bool hasValidSpacePrefix() const;
    490   bool hasValidLeftJustified() const;
    491   bool hasValidThousandsGroupingPrefix() const;
    492 
    493   bool hasValidPrecision() const;
    494   bool hasValidFieldWidth() const;
    495 };
    496 }  // end analyze_printf namespace
    497 
    498 //===----------------------------------------------------------------------===//
    499 /// Pieces specific to fscanf format strings.
    500 
    501 namespace analyze_scanf {
    502 
    503 class ScanfConversionSpecifier :
    504     public analyze_format_string::ConversionSpecifier  {
    505 public:
    506   ScanfConversionSpecifier()
    507     : ConversionSpecifier(false, 0, InvalidSpecifier) {}
    508 
    509   ScanfConversionSpecifier(const char *pos, Kind k)
    510     : ConversionSpecifier(false, pos, k) {}
    511 
    512   void setEndScanList(const char *pos) { EndScanList = pos; }
    513 
    514   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
    515     return !CS->isPrintfKind();
    516   }
    517 };
    518 
    519 using analyze_format_string::ArgTypeResult;
    520 using analyze_format_string::LengthModifier;
    521 using analyze_format_string::OptionalAmount;
    522 using analyze_format_string::OptionalFlag;
    523 
    524 class ScanfArgTypeResult : public ArgTypeResult {
    525 public:
    526   enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy };
    527 private:
    528   Kind K;
    529   ArgTypeResult A;
    530   const char *Name;
    531   QualType getRepresentativeType(ASTContext &C) const;
    532 public:
    533   ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {}
    534   ScanfArgTypeResult(ArgTypeResult a, const char *n = 0)
    535       : K(PtrToArgTypeResultTy), A(a), Name(n) {
    536     assert(A.isValid());
    537   }
    538 
    539   static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); }
    540 
    541   bool isValid() const { return K != InvalidTy; }
    542 
    543   bool matchesType(ASTContext& C, QualType argTy) const;
    544 
    545   std::string getRepresentativeTypeName(ASTContext& C) const;
    546 };
    547 
    548 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
    549   OptionalFlag SuppressAssignment; // '*'
    550 public:
    551   ScanfSpecifier() :
    552     FormatSpecifier(/* isPrintf = */ false),
    553     SuppressAssignment("*") {}
    554 
    555   void setSuppressAssignment(const char *position) {
    556     SuppressAssignment = true;
    557     SuppressAssignment.setPosition(position);
    558   }
    559 
    560   const OptionalFlag &getSuppressAssignment() const {
    561     return SuppressAssignment;
    562   }
    563 
    564   void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
    565     CS = cs;
    566   }
    567 
    568   const ScanfConversionSpecifier &getConversionSpecifier() const {
    569     return cast<ScanfConversionSpecifier>(CS);
    570   }
    571 
    572   bool consumesDataArgument() const {
    573     return CS.consumesDataArgument() && !SuppressAssignment;
    574   }
    575 
    576   ScanfArgTypeResult getArgType(ASTContext &Ctx) const;
    577 
    578   bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
    579 
    580   void toString(raw_ostream &os) const;
    581 
    582   static ScanfSpecifier Parse(const char *beg, const char *end);
    583 };
    584 
    585 } // end analyze_scanf namespace
    586 
    587 //===----------------------------------------------------------------------===//
    588 // Parsing and processing of format strings (both fprintf and fscanf).
    589 
    590 namespace analyze_format_string {
    591 
    592 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
    593 
    594 class FormatStringHandler {
    595 public:
    596   FormatStringHandler() {}
    597   virtual ~FormatStringHandler();
    598 
    599   virtual void HandleNullChar(const char *nullCharacter) {}
    600 
    601   virtual void HandlePosition(const char *startPos, unsigned posLen) {}
    602 
    603   virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
    604                                      PositionContext p) {}
    605 
    606   virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
    607 
    608   virtual void HandleIncompleteSpecifier(const char *startSpecifier,
    609                                          unsigned specifierLen) {}
    610 
    611   // Printf-specific handlers.
    612 
    613   virtual bool HandleInvalidPrintfConversionSpecifier(
    614                                       const analyze_printf::PrintfSpecifier &FS,
    615                                       const char *startSpecifier,
    616                                       unsigned specifierLen) {
    617     return true;
    618   }
    619 
    620   virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
    621                                      const char *startSpecifier,
    622                                      unsigned specifierLen) {
    623     return true;
    624   }
    625 
    626     // Scanf-specific handlers.
    627 
    628   virtual bool HandleInvalidScanfConversionSpecifier(
    629                                         const analyze_scanf::ScanfSpecifier &FS,
    630                                         const char *startSpecifier,
    631                                         unsigned specifierLen) {
    632     return true;
    633   }
    634 
    635   virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
    636                                     const char *startSpecifier,
    637                                     unsigned specifierLen) {
    638     return true;
    639   }
    640 
    641   virtual void HandleIncompleteScanList(const char *start, const char *end) {}
    642 };
    643 
    644 bool ParsePrintfString(FormatStringHandler &H,
    645                        const char *beg, const char *end, const LangOptions &LO);
    646 
    647 bool ParseScanfString(FormatStringHandler &H,
    648                       const char *beg, const char *end, const LangOptions &LO);
    649 
    650 } // end analyze_format_string namespace
    651 } // end clang namespace
    652 #endif
    653