Home | History | Annotate | Download | only in Analyses
      1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines APIs for analyzing the format strings of printf, fscanf,
     11 // and friends.
     12 //
     13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
     14 //
     15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #ifndef LLVM_CLANG_FORMAT_H
     20 #define LLVM_CLANG_FORMAT_H
     21 
     22 #include "clang/AST/CanonicalType.h"
     23 
     24 namespace clang {
     25 
     26 class TargetInfo;
     27 
     28 //===----------------------------------------------------------------------===//
     29 /// Common components of both fprintf and fscanf format strings.
     30 namespace analyze_format_string {
     31 
     32 /// Class representing optional flags with location and representation
     33 /// information.
     34 class OptionalFlag {
     35 public:
     36   OptionalFlag(const char *Representation)
     37       : representation(Representation), flag(false) {}
     38   bool isSet() { return flag; }
     39   void set() { flag = true; }
     40   void clear() { flag = false; }
     41   void setPosition(const char *position) {
     42     assert(position);
     43     this->position = position;
     44   }
     45   const char *getPosition() const {
     46     assert(position);
     47     return position;
     48   }
     49   const char *toString() const { return representation; }
     50 
     51   // Overloaded operators for bool like qualities
     52   operator bool() const { return flag; }
     53   OptionalFlag& operator=(const bool &rhs) {
     54     flag = rhs;
     55     return *this;  // Return a reference to myself.
     56   }
     57 private:
     58   const char *representation;
     59   const char *position;
     60   bool flag;
     61 };
     62 
     63 /// Represents the length modifier in a format string in scanf/printf.
     64 class LengthModifier {
     65 public:
     66   enum Kind {
     67     None,
     68     AsChar,       // 'hh'
     69     AsShort,      // 'h'
     70     AsLong,       // 'l'
     71     AsLongLong,   // 'll'
     72     AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
     73     AsIntMax,     // 'j'
     74     AsSizeT,      // 'z'
     75     AsPtrDiff,    // 't'
     76     AsLongDouble, // 'L'
     77     AsAllocate,   // for '%as', GNU extension to C90 scanf
     78     AsMAllocate,  // for '%ms', GNU extension to scanf
     79     AsWideChar = AsLong // for '%ls', only makes sense for printf
     80   };
     81 
     82   LengthModifier()
     83     : Position(0), kind(None) {}
     84   LengthModifier(const char *pos, Kind k)
     85     : Position(pos), kind(k) {}
     86 
     87   const char *getStart() const {
     88     return Position;
     89   }
     90 
     91   unsigned getLength() const {
     92     switch (kind) {
     93       default:
     94         return 1;
     95       case AsLongLong:
     96       case AsChar:
     97         return 2;
     98       case None:
     99         return 0;
    100     }
    101   }
    102 
    103   Kind getKind() const { return kind; }
    104   void setKind(Kind k) { kind = k; }
    105 
    106   const char *toString() const;
    107 
    108 private:
    109   const char *Position;
    110   Kind kind;
    111 };
    112 
    113 class ConversionSpecifier {
    114 public:
    115   enum Kind {
    116     InvalidSpecifier = 0,
    117       // C99 conversion specifiers.
    118     cArg,
    119     dArg,
    120     DArg, // Apple extension
    121     iArg,
    122     IntArgBeg = dArg, IntArgEnd = iArg,
    123 
    124     oArg,
    125     OArg, // Apple extension
    126     uArg,
    127     UArg, // Apple extension
    128     xArg,
    129     XArg,
    130     UIntArgBeg = oArg, UIntArgEnd = XArg,
    131 
    132     fArg,
    133     FArg,
    134     eArg,
    135     EArg,
    136     gArg,
    137     GArg,
    138     aArg,
    139     AArg,
    140     DoubleArgBeg = fArg, DoubleArgEnd = AArg,
    141 
    142     sArg,
    143     pArg,
    144     nArg,
    145     PercentArg,
    146     CArg,
    147     SArg,
    148 
    149     // ** Printf-specific **
    150 
    151     // Objective-C specific specifiers.
    152     ObjCObjArg,  // '@'
    153     ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
    154 
    155     // GlibC specific specifiers.
    156     PrintErrno,   // 'm'
    157 
    158     PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
    159 
    160     // ** Scanf-specific **
    161     ScanListArg, // '['
    162     ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
    163   };
    164 
    165   ConversionSpecifier(bool isPrintf = true)
    166     : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
    167 
    168   ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
    169     : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
    170 
    171   const char *getStart() const {
    172     return Position;
    173   }
    174 
    175   StringRef getCharacters() const {
    176     return StringRef(getStart(), getLength());
    177   }
    178 
    179   bool consumesDataArgument() const {
    180     switch (kind) {
    181       case PrintErrno:
    182         assert(IsPrintf);
    183         return false;
    184       case PercentArg:
    185         return false;
    186       default:
    187         return true;
    188     }
    189   }
    190 
    191   Kind getKind() const { return kind; }
    192   void setKind(Kind k) { kind = k; }
    193   unsigned getLength() const {
    194     return EndScanList ? EndScanList - Position : 1;
    195   }
    196 
    197   bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
    198   bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
    199   bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
    200   const char *toString() const;
    201 
    202   bool isPrintfKind() const { return IsPrintf; }
    203 
    204   Optional<ConversionSpecifier> getStandardSpecifier() const;
    205 
    206 protected:
    207   bool IsPrintf;
    208   const char *Position;
    209   const char *EndScanList;
    210   Kind kind;
    211 };
    212 
    213 class ArgType {
    214 public:
    215   enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
    216               AnyCharTy, CStrTy, WCStrTy, WIntTy };
    217 private:
    218   const Kind K;
    219   QualType T;
    220   const char *Name;
    221   bool Ptr;
    222 public:
    223   ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {}
    224   ArgType(QualType t, const char *n = 0)
    225       : K(SpecificTy), T(t), Name(n), Ptr(false) {}
    226   ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {}
    227 
    228   static ArgType Invalid() { return ArgType(InvalidTy); }
    229   bool isValid() const { return K != InvalidTy; }
    230 
    231   /// Create an ArgType which corresponds to the type pointer to A.
    232   static ArgType PtrTo(const ArgType& A) {
    233     assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
    234     ArgType Res = A;
    235     Res.Ptr = true;
    236     return Res;
    237   }
    238 
    239   bool matchesType(ASTContext &C, QualType argTy) const;
    240 
    241   QualType getRepresentativeType(ASTContext &C) const;
    242 
    243   std::string getRepresentativeTypeName(ASTContext &C) const;
    244 };
    245 
    246 class OptionalAmount {
    247 public:
    248   enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
    249 
    250   OptionalAmount(HowSpecified howSpecified,
    251                  unsigned amount,
    252                  const char *amountStart,
    253                  unsigned amountLength,
    254                  bool usesPositionalArg)
    255   : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
    256   UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
    257 
    258   OptionalAmount(bool valid = true)
    259   : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
    260   UsesPositionalArg(0), UsesDotPrefix(0) {}
    261 
    262   bool isInvalid() const {
    263     return hs == Invalid;
    264   }
    265 
    266   HowSpecified getHowSpecified() const { return hs; }
    267   void setHowSpecified(HowSpecified h) { hs = h; }
    268 
    269   bool hasDataArgument() const { return hs == Arg; }
    270 
    271   unsigned getArgIndex() const {
    272     assert(hasDataArgument());
    273     return amt;
    274   }
    275 
    276   unsigned getConstantAmount() const {
    277     assert(hs == Constant);
    278     return amt;
    279   }
    280 
    281   const char *getStart() const {
    282       // We include the . character if it is given.
    283     return start - UsesDotPrefix;
    284   }
    285 
    286   unsigned getConstantLength() const {
    287     assert(hs == Constant);
    288     return length + UsesDotPrefix;
    289   }
    290 
    291   ArgType getArgType(ASTContext &Ctx) const;
    292 
    293   void toString(raw_ostream &os) const;
    294 
    295   bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
    296   unsigned getPositionalArgIndex() const {
    297     assert(hasDataArgument());
    298     return amt + 1;
    299   }
    300 
    301   bool usesDotPrefix() const { return UsesDotPrefix; }
    302   void setUsesDotPrefix() { UsesDotPrefix = true; }
    303 
    304 private:
    305   const char *start;
    306   unsigned length;
    307   HowSpecified hs;
    308   unsigned amt;
    309   bool UsesPositionalArg : 1;
    310   bool UsesDotPrefix;
    311 };
    312 
    313 
    314 class FormatSpecifier {
    315 protected:
    316   LengthModifier LM;
    317   OptionalAmount FieldWidth;
    318   ConversionSpecifier CS;
    319   /// Positional arguments, an IEEE extension:
    320   ///  IEEE Std 1003.1, 2004 Edition
    321   ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
    322   bool UsesPositionalArg;
    323   unsigned argIndex;
    324 public:
    325   FormatSpecifier(bool isPrintf)
    326     : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
    327 
    328   void setLengthModifier(LengthModifier lm) {
    329     LM = lm;
    330   }
    331 
    332   void setUsesPositionalArg() { UsesPositionalArg = true; }
    333 
    334   void setArgIndex(unsigned i) {
    335     argIndex = i;
    336   }
    337 
    338   unsigned getArgIndex() const {
    339     return argIndex;
    340   }
    341 
    342   unsigned getPositionalArgIndex() const {
    343     return argIndex + 1;
    344   }
    345 
    346   const LengthModifier &getLengthModifier() const {
    347     return LM;
    348   }
    349 
    350   const OptionalAmount &getFieldWidth() const {
    351     return FieldWidth;
    352   }
    353 
    354   void setFieldWidth(const OptionalAmount &Amt) {
    355     FieldWidth = Amt;
    356   }
    357 
    358   bool usesPositionalArg() const { return UsesPositionalArg; }
    359 
    360   bool hasValidLengthModifier(const TargetInfo &Target) const;
    361 
    362   bool hasStandardLengthModifier() const;
    363 
    364   Optional<LengthModifier> getCorrectedLengthModifier() const;
    365 
    366   bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
    367 
    368   bool hasStandardLengthConversionCombination() const;
    369 
    370   /// For a TypedefType QT, if it is a named integer type such as size_t,
    371   /// assign the appropriate value to LM and return true.
    372   static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
    373 };
    374 
    375 } // end analyze_format_string namespace
    376 
    377 //===----------------------------------------------------------------------===//
    378 /// Pieces specific to fprintf format strings.
    379 
    380 namespace analyze_printf {
    381 
    382 class PrintfConversionSpecifier :
    383   public analyze_format_string::ConversionSpecifier  {
    384 public:
    385   PrintfConversionSpecifier()
    386     : ConversionSpecifier(true, 0, InvalidSpecifier) {}
    387 
    388   PrintfConversionSpecifier(const char *pos, Kind k)
    389     : ConversionSpecifier(true, pos, k) {}
    390 
    391   bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
    392   bool isDoubleArg() const { return kind >= DoubleArgBeg &&
    393                                     kind <= DoubleArgEnd; }
    394   unsigned getLength() const {
    395       // Conversion specifiers currently only are represented by
    396       // single characters, but we be flexible.
    397     return 1;
    398   }
    399 
    400   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
    401     return CS->isPrintfKind();
    402   }
    403 };
    404 
    405 using analyze_format_string::ArgType;
    406 using analyze_format_string::LengthModifier;
    407 using analyze_format_string::OptionalAmount;
    408 using analyze_format_string::OptionalFlag;
    409 
    410 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
    411   OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
    412   OptionalFlag IsLeftJustified; // '-'
    413   OptionalFlag HasPlusPrefix; // '+'
    414   OptionalFlag HasSpacePrefix; // ' '
    415   OptionalFlag HasAlternativeForm; // '#'
    416   OptionalFlag HasLeadingZeroes; // '0'
    417   OptionalAmount Precision;
    418 public:
    419   PrintfSpecifier() :
    420     FormatSpecifier(/* isPrintf = */ true),
    421     HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
    422     HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
    423 
    424   static PrintfSpecifier Parse(const char *beg, const char *end);
    425 
    426     // Methods for incrementally constructing the PrintfSpecifier.
    427   void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
    428     CS = cs;
    429   }
    430   void setHasThousandsGrouping(const char *position) {
    431     HasThousandsGrouping = true;
    432     HasThousandsGrouping.setPosition(position);
    433   }
    434   void setIsLeftJustified(const char *position) {
    435     IsLeftJustified = true;
    436     IsLeftJustified.setPosition(position);
    437   }
    438   void setHasPlusPrefix(const char *position) {
    439     HasPlusPrefix = true;
    440     HasPlusPrefix.setPosition(position);
    441   }
    442   void setHasSpacePrefix(const char *position) {
    443     HasSpacePrefix = true;
    444     HasSpacePrefix.setPosition(position);
    445   }
    446   void setHasAlternativeForm(const char *position) {
    447     HasAlternativeForm = true;
    448     HasAlternativeForm.setPosition(position);
    449   }
    450   void setHasLeadingZeros(const char *position) {
    451     HasLeadingZeroes = true;
    452     HasLeadingZeroes.setPosition(position);
    453   }
    454   void setUsesPositionalArg() { UsesPositionalArg = true; }
    455 
    456     // Methods for querying the format specifier.
    457 
    458   const PrintfConversionSpecifier &getConversionSpecifier() const {
    459     return cast<PrintfConversionSpecifier>(CS);
    460   }
    461 
    462   void setPrecision(const OptionalAmount &Amt) {
    463     Precision = Amt;
    464     Precision.setUsesDotPrefix();
    465   }
    466 
    467   const OptionalAmount &getPrecision() const {
    468     return Precision;
    469   }
    470 
    471   bool consumesDataArgument() const {
    472     return getConversionSpecifier().consumesDataArgument();
    473   }
    474 
    475   /// \brief Returns the builtin type that a data argument
    476   /// paired with this format specifier should have.  This method
    477   /// will return null if the format specifier does not have
    478   /// a matching data argument or the matching argument matches
    479   /// more than one type.
    480   ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
    481 
    482   const OptionalFlag &hasThousandsGrouping() const {
    483       return HasThousandsGrouping;
    484   }
    485   const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
    486   const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
    487   const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
    488   const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
    489   const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
    490   bool usesPositionalArg() const { return UsesPositionalArg; }
    491 
    492   /// Changes the specifier and length according to a QualType, retaining any
    493   /// flags or options. Returns true on success, or false when a conversion
    494   /// was not successful.
    495   bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
    496                bool IsObjCLiteral);
    497 
    498   void toString(raw_ostream &os) const;
    499 
    500   // Validation methods - to check if any element results in undefined behavior
    501   bool hasValidPlusPrefix() const;
    502   bool hasValidAlternativeForm() const;
    503   bool hasValidLeadingZeros() const;
    504   bool hasValidSpacePrefix() const;
    505   bool hasValidLeftJustified() const;
    506   bool hasValidThousandsGroupingPrefix() const;
    507 
    508   bool hasValidPrecision() const;
    509   bool hasValidFieldWidth() const;
    510 };
    511 }  // end analyze_printf namespace
    512 
    513 //===----------------------------------------------------------------------===//
    514 /// Pieces specific to fscanf format strings.
    515 
    516 namespace analyze_scanf {
    517 
    518 class ScanfConversionSpecifier :
    519     public analyze_format_string::ConversionSpecifier  {
    520 public:
    521   ScanfConversionSpecifier()
    522     : ConversionSpecifier(false, 0, InvalidSpecifier) {}
    523 
    524   ScanfConversionSpecifier(const char *pos, Kind k)
    525     : ConversionSpecifier(false, pos, k) {}
    526 
    527   void setEndScanList(const char *pos) { EndScanList = pos; }
    528 
    529   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
    530     return !CS->isPrintfKind();
    531   }
    532 };
    533 
    534 using analyze_format_string::ArgType;
    535 using analyze_format_string::LengthModifier;
    536 using analyze_format_string::OptionalAmount;
    537 using analyze_format_string::OptionalFlag;
    538 
    539 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
    540   OptionalFlag SuppressAssignment; // '*'
    541 public:
    542   ScanfSpecifier() :
    543     FormatSpecifier(/* isPrintf = */ false),
    544     SuppressAssignment("*") {}
    545 
    546   void setSuppressAssignment(const char *position) {
    547     SuppressAssignment = true;
    548     SuppressAssignment.setPosition(position);
    549   }
    550 
    551   const OptionalFlag &getSuppressAssignment() const {
    552     return SuppressAssignment;
    553   }
    554 
    555   void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
    556     CS = cs;
    557   }
    558 
    559   const ScanfConversionSpecifier &getConversionSpecifier() const {
    560     return cast<ScanfConversionSpecifier>(CS);
    561   }
    562 
    563   bool consumesDataArgument() const {
    564     return CS.consumesDataArgument() && !SuppressAssignment;
    565   }
    566 
    567   ArgType getArgType(ASTContext &Ctx) const;
    568 
    569   bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
    570 
    571   void toString(raw_ostream &os) const;
    572 
    573   static ScanfSpecifier Parse(const char *beg, const char *end);
    574 };
    575 
    576 } // end analyze_scanf namespace
    577 
    578 //===----------------------------------------------------------------------===//
    579 // Parsing and processing of format strings (both fprintf and fscanf).
    580 
    581 namespace analyze_format_string {
    582 
    583 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
    584 
    585 class FormatStringHandler {
    586 public:
    587   FormatStringHandler() {}
    588   virtual ~FormatStringHandler();
    589 
    590   virtual void HandleNullChar(const char *nullCharacter) {}
    591 
    592   virtual void HandlePosition(const char *startPos, unsigned posLen) {}
    593 
    594   virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
    595                                      PositionContext p) {}
    596 
    597   virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
    598 
    599   virtual void HandleIncompleteSpecifier(const char *startSpecifier,
    600                                          unsigned specifierLen) {}
    601 
    602   // Printf-specific handlers.
    603 
    604   virtual bool HandleInvalidPrintfConversionSpecifier(
    605                                       const analyze_printf::PrintfSpecifier &FS,
    606                                       const char *startSpecifier,
    607                                       unsigned specifierLen) {
    608     return true;
    609   }
    610 
    611   virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
    612                                      const char *startSpecifier,
    613                                      unsigned specifierLen) {
    614     return true;
    615   }
    616 
    617     // Scanf-specific handlers.
    618 
    619   virtual bool HandleInvalidScanfConversionSpecifier(
    620                                         const analyze_scanf::ScanfSpecifier &FS,
    621                                         const char *startSpecifier,
    622                                         unsigned specifierLen) {
    623     return true;
    624   }
    625 
    626   virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
    627                                     const char *startSpecifier,
    628                                     unsigned specifierLen) {
    629     return true;
    630   }
    631 
    632   virtual void HandleIncompleteScanList(const char *start, const char *end) {}
    633 };
    634 
    635 bool ParsePrintfString(FormatStringHandler &H,
    636                        const char *beg, const char *end, const LangOptions &LO,
    637                        const TargetInfo &Target);
    638 
    639 bool ParseScanfString(FormatStringHandler &H,
    640                       const char *beg, const char *end, const LangOptions &LO,
    641                       const TargetInfo &Target);
    642 
    643 } // end analyze_format_string namespace
    644 } // end clang namespace
    645 #endif
    646