Home | History | Annotate | Download | only in Analyses
      1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines APIs for analyzing the format strings of printf, fscanf,
     11 // and friends.
     12 //
     13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
     14 //
     15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #ifndef LLVM_CLANG_FORMAT_H
     20 #define LLVM_CLANG_FORMAT_H
     21 
     22 #include "clang/AST/CanonicalType.h"
     23 
     24 namespace clang {
     25 
     26 class TargetInfo;
     27 
     28 //===----------------------------------------------------------------------===//
     29 /// Common components of both fprintf and fscanf format strings.
     30 namespace analyze_format_string {
     31 
     32 /// Class representing optional flags with location and representation
     33 /// information.
     34 class OptionalFlag {
     35 public:
     36   OptionalFlag(const char *Representation)
     37       : representation(Representation), flag(false) {}
     38   bool isSet() { return flag; }
     39   void set() { flag = true; }
     40   void clear() { flag = false; }
     41   void setPosition(const char *position) {
     42     assert(position);
     43     this->position = position;
     44   }
     45   const char *getPosition() const {
     46     assert(position);
     47     return position;
     48   }
     49   const char *toString() const { return representation; }
     50 
     51   // Overloaded operators for bool like qualities
     52   LLVM_EXPLICIT operator bool() const { return flag; }
     53   OptionalFlag& operator=(const bool &rhs) {
     54     flag = rhs;
     55     return *this;  // Return a reference to myself.
     56   }
     57 private:
     58   const char *representation;
     59   const char *position;
     60   bool flag;
     61 };
     62 
     63 /// Represents the length modifier in a format string in scanf/printf.
     64 class LengthModifier {
     65 public:
     66   enum Kind {
     67     None,
     68     AsChar,       // 'hh'
     69     AsShort,      // 'h'
     70     AsLong,       // 'l'
     71     AsLongLong,   // 'll'
     72     AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
     73     AsIntMax,     // 'j'
     74     AsSizeT,      // 'z'
     75     AsPtrDiff,    // 't'
     76     AsInt32,      // 'I32' (MSVCRT, like __int32)
     77     AsInt3264,    // 'I'   (MSVCRT, like __int3264 from MIDL)
     78     AsInt64,      // 'I64' (MSVCRT, like __int64)
     79     AsLongDouble, // 'L'
     80     AsAllocate,   // for '%as', GNU extension to C90 scanf
     81     AsMAllocate,  // for '%ms', GNU extension to scanf
     82     AsWideChar = AsLong // for '%ls', only makes sense for printf
     83   };
     84 
     85   LengthModifier()
     86     : Position(nullptr), kind(None) {}
     87   LengthModifier(const char *pos, Kind k)
     88     : Position(pos), kind(k) {}
     89 
     90   const char *getStart() const {
     91     return Position;
     92   }
     93 
     94   unsigned getLength() const {
     95     switch (kind) {
     96       default:
     97         return 1;
     98       case AsLongLong:
     99       case AsChar:
    100         return 2;
    101       case AsInt32:
    102       case AsInt64:
    103         return 3;
    104       case None:
    105         return 0;
    106     }
    107   }
    108 
    109   Kind getKind() const { return kind; }
    110   void setKind(Kind k) { kind = k; }
    111 
    112   const char *toString() const;
    113 
    114 private:
    115   const char *Position;
    116   Kind kind;
    117 };
    118 
    119 class ConversionSpecifier {
    120 public:
    121   enum Kind {
    122     InvalidSpecifier = 0,
    123       // C99 conversion specifiers.
    124     cArg,
    125     dArg,
    126     DArg, // Apple extension
    127     iArg,
    128     IntArgBeg = dArg, IntArgEnd = iArg,
    129 
    130     oArg,
    131     OArg, // Apple extension
    132     uArg,
    133     UArg, // Apple extension
    134     xArg,
    135     XArg,
    136     UIntArgBeg = oArg, UIntArgEnd = XArg,
    137 
    138     fArg,
    139     FArg,
    140     eArg,
    141     EArg,
    142     gArg,
    143     GArg,
    144     aArg,
    145     AArg,
    146     DoubleArgBeg = fArg, DoubleArgEnd = AArg,
    147 
    148     sArg,
    149     pArg,
    150     nArg,
    151     PercentArg,
    152     CArg,
    153     SArg,
    154 
    155     // ** Printf-specific **
    156 
    157     // Objective-C specific specifiers.
    158     ObjCObjArg,  // '@'
    159     ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
    160 
    161     // GlibC specific specifiers.
    162     PrintErrno,   // 'm'
    163 
    164     PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
    165 
    166     // ** Scanf-specific **
    167     ScanListArg, // '['
    168     ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
    169   };
    170 
    171   ConversionSpecifier(bool isPrintf = true)
    172     : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr),
    173       kind(InvalidSpecifier) {}
    174 
    175   ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
    176     : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {}
    177 
    178   const char *getStart() const {
    179     return Position;
    180   }
    181 
    182   StringRef getCharacters() const {
    183     return StringRef(getStart(), getLength());
    184   }
    185 
    186   bool consumesDataArgument() const {
    187     switch (kind) {
    188       case PrintErrno:
    189         assert(IsPrintf);
    190         return false;
    191       case PercentArg:
    192         return false;
    193       default:
    194         return true;
    195     }
    196   }
    197 
    198   Kind getKind() const { return kind; }
    199   void setKind(Kind k) { kind = k; }
    200   unsigned getLength() const {
    201     return EndScanList ? EndScanList - Position : 1;
    202   }
    203 
    204   bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
    205   bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
    206   bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
    207   const char *toString() const;
    208 
    209   bool isPrintfKind() const { return IsPrintf; }
    210 
    211   Optional<ConversionSpecifier> getStandardSpecifier() const;
    212 
    213 protected:
    214   bool IsPrintf;
    215   const char *Position;
    216   const char *EndScanList;
    217   Kind kind;
    218 };
    219 
    220 class ArgType {
    221 public:
    222   enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
    223               AnyCharTy, CStrTy, WCStrTy, WIntTy };
    224 private:
    225   const Kind K;
    226   QualType T;
    227   const char *Name;
    228   bool Ptr;
    229 public:
    230   ArgType(Kind k = UnknownTy, const char *n = nullptr)
    231       : K(k), Name(n), Ptr(false) {}
    232   ArgType(QualType t, const char *n = nullptr)
    233       : K(SpecificTy), T(t), Name(n), Ptr(false) {}
    234   ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {}
    235 
    236   static ArgType Invalid() { return ArgType(InvalidTy); }
    237   bool isValid() const { return K != InvalidTy; }
    238 
    239   /// Create an ArgType which corresponds to the type pointer to A.
    240   static ArgType PtrTo(const ArgType& A) {
    241     assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
    242     ArgType Res = A;
    243     Res.Ptr = true;
    244     return Res;
    245   }
    246 
    247   bool matchesType(ASTContext &C, QualType argTy) const;
    248 
    249   QualType getRepresentativeType(ASTContext &C) const;
    250 
    251   std::string getRepresentativeTypeName(ASTContext &C) const;
    252 };
    253 
    254 class OptionalAmount {
    255 public:
    256   enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
    257 
    258   OptionalAmount(HowSpecified howSpecified,
    259                  unsigned amount,
    260                  const char *amountStart,
    261                  unsigned amountLength,
    262                  bool usesPositionalArg)
    263   : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
    264   UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
    265 
    266   OptionalAmount(bool valid = true)
    267   : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
    268   UsesPositionalArg(0), UsesDotPrefix(0) {}
    269 
    270   bool isInvalid() const {
    271     return hs == Invalid;
    272   }
    273 
    274   HowSpecified getHowSpecified() const { return hs; }
    275   void setHowSpecified(HowSpecified h) { hs = h; }
    276 
    277   bool hasDataArgument() const { return hs == Arg; }
    278 
    279   unsigned getArgIndex() const {
    280     assert(hasDataArgument());
    281     return amt;
    282   }
    283 
    284   unsigned getConstantAmount() const {
    285     assert(hs == Constant);
    286     return amt;
    287   }
    288 
    289   const char *getStart() const {
    290       // We include the . character if it is given.
    291     return start - UsesDotPrefix;
    292   }
    293 
    294   unsigned getConstantLength() const {
    295     assert(hs == Constant);
    296     return length + UsesDotPrefix;
    297   }
    298 
    299   ArgType getArgType(ASTContext &Ctx) const;
    300 
    301   void toString(raw_ostream &os) const;
    302 
    303   bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
    304   unsigned getPositionalArgIndex() const {
    305     assert(hasDataArgument());
    306     return amt + 1;
    307   }
    308 
    309   bool usesDotPrefix() const { return UsesDotPrefix; }
    310   void setUsesDotPrefix() { UsesDotPrefix = true; }
    311 
    312 private:
    313   const char *start;
    314   unsigned length;
    315   HowSpecified hs;
    316   unsigned amt;
    317   bool UsesPositionalArg : 1;
    318   bool UsesDotPrefix;
    319 };
    320 
    321 
    322 class FormatSpecifier {
    323 protected:
    324   LengthModifier LM;
    325   OptionalAmount FieldWidth;
    326   ConversionSpecifier CS;
    327   /// Positional arguments, an IEEE extension:
    328   ///  IEEE Std 1003.1, 2004 Edition
    329   ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
    330   bool UsesPositionalArg;
    331   unsigned argIndex;
    332 public:
    333   FormatSpecifier(bool isPrintf)
    334     : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
    335 
    336   void setLengthModifier(LengthModifier lm) {
    337     LM = lm;
    338   }
    339 
    340   void setUsesPositionalArg() { UsesPositionalArg = true; }
    341 
    342   void setArgIndex(unsigned i) {
    343     argIndex = i;
    344   }
    345 
    346   unsigned getArgIndex() const {
    347     return argIndex;
    348   }
    349 
    350   unsigned getPositionalArgIndex() const {
    351     return argIndex + 1;
    352   }
    353 
    354   const LengthModifier &getLengthModifier() const {
    355     return LM;
    356   }
    357 
    358   const OptionalAmount &getFieldWidth() const {
    359     return FieldWidth;
    360   }
    361 
    362   void setFieldWidth(const OptionalAmount &Amt) {
    363     FieldWidth = Amt;
    364   }
    365 
    366   bool usesPositionalArg() const { return UsesPositionalArg; }
    367 
    368   bool hasValidLengthModifier(const TargetInfo &Target) const;
    369 
    370   bool hasStandardLengthModifier() const;
    371 
    372   Optional<LengthModifier> getCorrectedLengthModifier() const;
    373 
    374   bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
    375 
    376   bool hasStandardLengthConversionCombination() const;
    377 
    378   /// For a TypedefType QT, if it is a named integer type such as size_t,
    379   /// assign the appropriate value to LM and return true.
    380   static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
    381 };
    382 
    383 } // end analyze_format_string namespace
    384 
    385 //===----------------------------------------------------------------------===//
    386 /// Pieces specific to fprintf format strings.
    387 
    388 namespace analyze_printf {
    389 
    390 class PrintfConversionSpecifier :
    391   public analyze_format_string::ConversionSpecifier  {
    392 public:
    393   PrintfConversionSpecifier()
    394     : ConversionSpecifier(true, nullptr, InvalidSpecifier) {}
    395 
    396   PrintfConversionSpecifier(const char *pos, Kind k)
    397     : ConversionSpecifier(true, pos, k) {}
    398 
    399   bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
    400   bool isDoubleArg() const { return kind >= DoubleArgBeg &&
    401                                     kind <= DoubleArgEnd; }
    402   unsigned getLength() const {
    403       // Conversion specifiers currently only are represented by
    404       // single characters, but we be flexible.
    405     return 1;
    406   }
    407 
    408   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
    409     return CS->isPrintfKind();
    410   }
    411 };
    412 
    413 using analyze_format_string::ArgType;
    414 using analyze_format_string::LengthModifier;
    415 using analyze_format_string::OptionalAmount;
    416 using analyze_format_string::OptionalFlag;
    417 
    418 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
    419   OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
    420   OptionalFlag IsLeftJustified; // '-'
    421   OptionalFlag HasPlusPrefix; // '+'
    422   OptionalFlag HasSpacePrefix; // ' '
    423   OptionalFlag HasAlternativeForm; // '#'
    424   OptionalFlag HasLeadingZeroes; // '0'
    425   OptionalAmount Precision;
    426 public:
    427   PrintfSpecifier() :
    428     FormatSpecifier(/* isPrintf = */ true),
    429     HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
    430     HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
    431 
    432   static PrintfSpecifier Parse(const char *beg, const char *end);
    433 
    434     // Methods for incrementally constructing the PrintfSpecifier.
    435   void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
    436     CS = cs;
    437   }
    438   void setHasThousandsGrouping(const char *position) {
    439     HasThousandsGrouping = true;
    440     HasThousandsGrouping.setPosition(position);
    441   }
    442   void setIsLeftJustified(const char *position) {
    443     IsLeftJustified = true;
    444     IsLeftJustified.setPosition(position);
    445   }
    446   void setHasPlusPrefix(const char *position) {
    447     HasPlusPrefix = true;
    448     HasPlusPrefix.setPosition(position);
    449   }
    450   void setHasSpacePrefix(const char *position) {
    451     HasSpacePrefix = true;
    452     HasSpacePrefix.setPosition(position);
    453   }
    454   void setHasAlternativeForm(const char *position) {
    455     HasAlternativeForm = true;
    456     HasAlternativeForm.setPosition(position);
    457   }
    458   void setHasLeadingZeros(const char *position) {
    459     HasLeadingZeroes = true;
    460     HasLeadingZeroes.setPosition(position);
    461   }
    462   void setUsesPositionalArg() { UsesPositionalArg = true; }
    463 
    464     // Methods for querying the format specifier.
    465 
    466   const PrintfConversionSpecifier &getConversionSpecifier() const {
    467     return cast<PrintfConversionSpecifier>(CS);
    468   }
    469 
    470   void setPrecision(const OptionalAmount &Amt) {
    471     Precision = Amt;
    472     Precision.setUsesDotPrefix();
    473   }
    474 
    475   const OptionalAmount &getPrecision() const {
    476     return Precision;
    477   }
    478 
    479   bool consumesDataArgument() const {
    480     return getConversionSpecifier().consumesDataArgument();
    481   }
    482 
    483   /// \brief Returns the builtin type that a data argument
    484   /// paired with this format specifier should have.  This method
    485   /// will return null if the format specifier does not have
    486   /// a matching data argument or the matching argument matches
    487   /// more than one type.
    488   ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
    489 
    490   const OptionalFlag &hasThousandsGrouping() const {
    491       return HasThousandsGrouping;
    492   }
    493   const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
    494   const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
    495   const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
    496   const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
    497   const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
    498   bool usesPositionalArg() const { return UsesPositionalArg; }
    499 
    500   /// Changes the specifier and length according to a QualType, retaining any
    501   /// flags or options. Returns true on success, or false when a conversion
    502   /// was not successful.
    503   bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
    504                bool IsObjCLiteral);
    505 
    506   void toString(raw_ostream &os) const;
    507 
    508   // Validation methods - to check if any element results in undefined behavior
    509   bool hasValidPlusPrefix() const;
    510   bool hasValidAlternativeForm() const;
    511   bool hasValidLeadingZeros() const;
    512   bool hasValidSpacePrefix() const;
    513   bool hasValidLeftJustified() const;
    514   bool hasValidThousandsGroupingPrefix() const;
    515 
    516   bool hasValidPrecision() const;
    517   bool hasValidFieldWidth() const;
    518 };
    519 }  // end analyze_printf namespace
    520 
    521 //===----------------------------------------------------------------------===//
    522 /// Pieces specific to fscanf format strings.
    523 
    524 namespace analyze_scanf {
    525 
    526 class ScanfConversionSpecifier :
    527     public analyze_format_string::ConversionSpecifier  {
    528 public:
    529   ScanfConversionSpecifier()
    530     : ConversionSpecifier(false, nullptr, InvalidSpecifier) {}
    531 
    532   ScanfConversionSpecifier(const char *pos, Kind k)
    533     : ConversionSpecifier(false, pos, k) {}
    534 
    535   void setEndScanList(const char *pos) { EndScanList = pos; }
    536 
    537   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
    538     return !CS->isPrintfKind();
    539   }
    540 };
    541 
    542 using analyze_format_string::ArgType;
    543 using analyze_format_string::LengthModifier;
    544 using analyze_format_string::OptionalAmount;
    545 using analyze_format_string::OptionalFlag;
    546 
    547 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
    548   OptionalFlag SuppressAssignment; // '*'
    549 public:
    550   ScanfSpecifier() :
    551     FormatSpecifier(/* isPrintf = */ false),
    552     SuppressAssignment("*") {}
    553 
    554   void setSuppressAssignment(const char *position) {
    555     SuppressAssignment = true;
    556     SuppressAssignment.setPosition(position);
    557   }
    558 
    559   const OptionalFlag &getSuppressAssignment() const {
    560     return SuppressAssignment;
    561   }
    562 
    563   void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
    564     CS = cs;
    565   }
    566 
    567   const ScanfConversionSpecifier &getConversionSpecifier() const {
    568     return cast<ScanfConversionSpecifier>(CS);
    569   }
    570 
    571   bool consumesDataArgument() const {
    572     return CS.consumesDataArgument() && !SuppressAssignment;
    573   }
    574 
    575   ArgType getArgType(ASTContext &Ctx) const;
    576 
    577   bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt,
    578                ASTContext &Ctx);
    579 
    580   void toString(raw_ostream &os) const;
    581 
    582   static ScanfSpecifier Parse(const char *beg, const char *end);
    583 };
    584 
    585 } // end analyze_scanf namespace
    586 
    587 //===----------------------------------------------------------------------===//
    588 // Parsing and processing of format strings (both fprintf and fscanf).
    589 
    590 namespace analyze_format_string {
    591 
    592 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
    593 
    594 class FormatStringHandler {
    595 public:
    596   FormatStringHandler() {}
    597   virtual ~FormatStringHandler();
    598 
    599   virtual void HandleNullChar(const char *nullCharacter) {}
    600 
    601   virtual void HandlePosition(const char *startPos, unsigned posLen) {}
    602 
    603   virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
    604                                      PositionContext p) {}
    605 
    606   virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
    607 
    608   virtual void HandleIncompleteSpecifier(const char *startSpecifier,
    609                                          unsigned specifierLen) {}
    610 
    611   // Printf-specific handlers.
    612 
    613   virtual bool HandleInvalidPrintfConversionSpecifier(
    614                                       const analyze_printf::PrintfSpecifier &FS,
    615                                       const char *startSpecifier,
    616                                       unsigned specifierLen) {
    617     return true;
    618   }
    619 
    620   virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
    621                                      const char *startSpecifier,
    622                                      unsigned specifierLen) {
    623     return true;
    624   }
    625 
    626     // Scanf-specific handlers.
    627 
    628   virtual bool HandleInvalidScanfConversionSpecifier(
    629                                         const analyze_scanf::ScanfSpecifier &FS,
    630                                         const char *startSpecifier,
    631                                         unsigned specifierLen) {
    632     return true;
    633   }
    634 
    635   virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
    636                                     const char *startSpecifier,
    637                                     unsigned specifierLen) {
    638     return true;
    639   }
    640 
    641   virtual void HandleIncompleteScanList(const char *start, const char *end) {}
    642 };
    643 
    644 bool ParsePrintfString(FormatStringHandler &H,
    645                        const char *beg, const char *end, const LangOptions &LO,
    646                        const TargetInfo &Target);
    647 
    648 bool ParseScanfString(FormatStringHandler &H,
    649                       const char *beg, const char *end, const LangOptions &LO,
    650                       const TargetInfo &Target);
    651 
    652 } // end analyze_format_string namespace
    653 } // end clang namespace
    654 #endif
    655