Home | History | Annotate | Download | only in Analyses
      1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines APIs for analyzing the format strings of printf, fscanf,
     11 // and friends.
     12 //
     13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
     14 //
     15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #ifndef LLVM_CLANG_FORMAT_H
     20 #define LLVM_CLANG_FORMAT_H
     21 
     22 #include "clang/AST/CanonicalType.h"
     23 
     24 namespace clang {
     25 
     26 //===----------------------------------------------------------------------===//
     27 /// Common components of both fprintf and fscanf format strings.
     28 namespace analyze_format_string {
     29 
     30 /// Class representing optional flags with location and representation
     31 /// information.
     32 class OptionalFlag {
     33 public:
     34   OptionalFlag(const char *Representation)
     35       : representation(Representation), flag(false) {}
     36   bool isSet() { return flag; }
     37   void set() { flag = true; }
     38   void clear() { flag = false; }
     39   void setPosition(const char *position) {
     40     assert(position);
     41     this->position = position;
     42   }
     43   const char *getPosition() const {
     44     assert(position);
     45     return position;
     46   }
     47   const char *toString() const { return representation; }
     48 
     49   // Overloaded operators for bool like qualities
     50   operator bool() const { return flag; }
     51   OptionalFlag& operator=(const bool &rhs) {
     52     flag = rhs;
     53     return *this;  // Return a reference to myself.
     54   }
     55 private:
     56   const char *representation;
     57   const char *position;
     58   bool flag;
     59 };
     60 
     61 /// Represents the length modifier in a format string in scanf/printf.
     62 class LengthModifier {
     63 public:
     64   enum Kind {
     65     None,
     66     AsChar,       // 'hh'
     67     AsShort,      // 'h'
     68     AsLong,       // 'l'
     69     AsLongLong,   // 'll', 'q' (BSD, deprecated)
     70     AsIntMax,     // 'j'
     71     AsSizeT,      // 'z'
     72     AsPtrDiff,    // 't'
     73     AsLongDouble, // 'L'
     74     AsWideChar = AsLong // for '%ls', only makes sense for printf
     75   };
     76 
     77   LengthModifier()
     78     : Position(0), kind(None) {}
     79   LengthModifier(const char *pos, Kind k)
     80     : Position(pos), kind(k) {}
     81 
     82   const char *getStart() const {
     83     return Position;
     84   }
     85 
     86   unsigned getLength() const {
     87     switch (kind) {
     88       default:
     89         return 1;
     90       case AsLongLong:
     91       case AsChar:
     92         return 2;
     93       case None:
     94         return 0;
     95     }
     96   }
     97 
     98   Kind getKind() const { return kind; }
     99   void setKind(Kind k) { kind = k; }
    100 
    101   const char *toString() const;
    102 
    103 private:
    104   const char *Position;
    105   Kind kind;
    106 };
    107 
    108 class ConversionSpecifier {
    109 public:
    110   enum Kind {
    111     InvalidSpecifier = 0,
    112       // C99 conversion specifiers.
    113     cArg,
    114     dArg,
    115     iArg,
    116     IntArgBeg = cArg, IntArgEnd = iArg,
    117 
    118     oArg,
    119     uArg,
    120     xArg,
    121     XArg,
    122     UIntArgBeg = oArg, UIntArgEnd = XArg,
    123 
    124     fArg,
    125     FArg,
    126     eArg,
    127     EArg,
    128     gArg,
    129     GArg,
    130     aArg,
    131     AArg,
    132     DoubleArgBeg = fArg, DoubleArgEnd = AArg,
    133 
    134     sArg,
    135     pArg,
    136     nArg,
    137     PercentArg,
    138     CArg,
    139     SArg,
    140 
    141     // ** Printf-specific **
    142 
    143     // Objective-C specific specifiers.
    144     ObjCObjArg,  // '@'
    145     ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
    146 
    147     // GlibC specific specifiers.
    148     PrintErrno,   // 'm'
    149 
    150     PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
    151 
    152     // ** Scanf-specific **
    153     ScanListArg, // '['
    154     ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
    155   };
    156 
    157   ConversionSpecifier(bool isPrintf)
    158     : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
    159 
    160   ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
    161     : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
    162 
    163   const char *getStart() const {
    164     return Position;
    165   }
    166 
    167   llvm::StringRef getCharacters() const {
    168     return llvm::StringRef(getStart(), getLength());
    169   }
    170 
    171   bool consumesDataArgument() const {
    172     switch (kind) {
    173       case PrintErrno:
    174         assert(IsPrintf);
    175       case PercentArg:
    176         return false;
    177       default:
    178         return true;
    179     }
    180   }
    181 
    182   Kind getKind() const { return kind; }
    183   void setKind(Kind k) { kind = k; }
    184   unsigned getLength() const {
    185     return EndScanList ? EndScanList - Position : 1;
    186   }
    187 
    188   const char *toString() const;
    189 
    190   bool isPrintfKind() const { return IsPrintf; }
    191 
    192 protected:
    193   bool IsPrintf;
    194   const char *Position;
    195   const char *EndScanList;
    196   Kind kind;
    197 };
    198 
    199 class ArgTypeResult {
    200 public:
    201   enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
    202     CStrTy, WCStrTy, WIntTy };
    203 private:
    204   const Kind K;
    205   QualType T;
    206   ArgTypeResult(bool) : K(InvalidTy) {}
    207 public:
    208   ArgTypeResult(Kind k = UnknownTy) : K(k) {}
    209   ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
    210   ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
    211 
    212   static ArgTypeResult Invalid() { return ArgTypeResult(true); }
    213 
    214   bool isValid() const { return K != InvalidTy; }
    215 
    216   const QualType *getSpecificType() const {
    217     return K == SpecificTy ? &T : 0;
    218   }
    219 
    220   bool matchesType(ASTContext &C, QualType argTy) const;
    221 
    222   bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
    223 
    224   QualType getRepresentativeType(ASTContext &C) const;
    225 };
    226 
    227 class OptionalAmount {
    228 public:
    229   enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
    230 
    231   OptionalAmount(HowSpecified howSpecified,
    232                  unsigned amount,
    233                  const char *amountStart,
    234                  unsigned amountLength,
    235                  bool usesPositionalArg)
    236   : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
    237   UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
    238 
    239   OptionalAmount(bool valid = true)
    240   : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
    241   UsesPositionalArg(0), UsesDotPrefix(0) {}
    242 
    243   bool isInvalid() const {
    244     return hs == Invalid;
    245   }
    246 
    247   HowSpecified getHowSpecified() const { return hs; }
    248   void setHowSpecified(HowSpecified h) { hs = h; }
    249 
    250   bool hasDataArgument() const { return hs == Arg; }
    251 
    252   unsigned getArgIndex() const {
    253     assert(hasDataArgument());
    254     return amt;
    255   }
    256 
    257   unsigned getConstantAmount() const {
    258     assert(hs == Constant);
    259     return amt;
    260   }
    261 
    262   const char *getStart() const {
    263       // We include the . character if it is given.
    264     return start - UsesDotPrefix;
    265   }
    266 
    267   unsigned getConstantLength() const {
    268     assert(hs == Constant);
    269     return length + UsesDotPrefix;
    270   }
    271 
    272   ArgTypeResult getArgType(ASTContext &Ctx) const;
    273 
    274   void toString(llvm::raw_ostream &os) const;
    275 
    276   bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
    277   unsigned getPositionalArgIndex() const {
    278     assert(hasDataArgument());
    279     return amt + 1;
    280   }
    281 
    282   bool usesDotPrefix() const { return UsesDotPrefix; }
    283   void setUsesDotPrefix() { UsesDotPrefix = true; }
    284 
    285 private:
    286   const char *start;
    287   unsigned length;
    288   HowSpecified hs;
    289   unsigned amt;
    290   bool UsesPositionalArg : 1;
    291   bool UsesDotPrefix;
    292 };
    293 
    294 
    295 class FormatSpecifier {
    296 protected:
    297   LengthModifier LM;
    298   OptionalAmount FieldWidth;
    299   ConversionSpecifier CS;
    300     /// Positional arguments, an IEEE extension:
    301     ///  IEEE Std 1003.1, 2004 Edition
    302     ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
    303   bool UsesPositionalArg;
    304   unsigned argIndex;
    305 public:
    306   FormatSpecifier(bool isPrintf)
    307     : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
    308 
    309   void setLengthModifier(LengthModifier lm) {
    310     LM = lm;
    311   }
    312 
    313   void setUsesPositionalArg() { UsesPositionalArg = true; }
    314 
    315   void setArgIndex(unsigned i) {
    316     argIndex = i;
    317   }
    318 
    319   unsigned getArgIndex() const {
    320     return argIndex;
    321   }
    322 
    323   unsigned getPositionalArgIndex() const {
    324     return argIndex + 1;
    325   }
    326 
    327   const LengthModifier &getLengthModifier() const {
    328     return LM;
    329   }
    330 
    331   const OptionalAmount &getFieldWidth() const {
    332     return FieldWidth;
    333   }
    334 
    335   void setFieldWidth(const OptionalAmount &Amt) {
    336     FieldWidth = Amt;
    337   }
    338 
    339   bool usesPositionalArg() const { return UsesPositionalArg; }
    340 
    341   bool hasValidLengthModifier() const;
    342 };
    343 
    344 } // end analyze_format_string namespace
    345 
    346 //===----------------------------------------------------------------------===//
    347 /// Pieces specific to fprintf format strings.
    348 
    349 namespace analyze_printf {
    350 
    351 class PrintfConversionSpecifier :
    352   public analyze_format_string::ConversionSpecifier  {
    353 public:
    354   PrintfConversionSpecifier()
    355     : ConversionSpecifier(true, 0, InvalidSpecifier) {}
    356 
    357   PrintfConversionSpecifier(const char *pos, Kind k)
    358     : ConversionSpecifier(true, pos, k) {}
    359 
    360   bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
    361   bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
    362   bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
    363   bool isDoubleArg() const { return kind >= DoubleArgBeg &&
    364                                     kind <= DoubleArgBeg; }
    365   unsigned getLength() const {
    366       // Conversion specifiers currently only are represented by
    367       // single characters, but we be flexible.
    368     return 1;
    369   }
    370 
    371   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
    372     return CS->isPrintfKind();
    373   }
    374 };
    375 
    376 using analyze_format_string::ArgTypeResult;
    377 using analyze_format_string::LengthModifier;
    378 using analyze_format_string::OptionalAmount;
    379 using analyze_format_string::OptionalFlag;
    380 
    381 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
    382   OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
    383   OptionalFlag IsLeftJustified; // '-'
    384   OptionalFlag HasPlusPrefix; // '+'
    385   OptionalFlag HasSpacePrefix; // ' '
    386   OptionalFlag HasAlternativeForm; // '#'
    387   OptionalFlag HasLeadingZeroes; // '0'
    388   OptionalAmount Precision;
    389 public:
    390   PrintfSpecifier() :
    391     FormatSpecifier(/* isPrintf = */ true),
    392     HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
    393     HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
    394 
    395   static PrintfSpecifier Parse(const char *beg, const char *end);
    396 
    397     // Methods for incrementally constructing the PrintfSpecifier.
    398   void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
    399     CS = cs;
    400   }
    401   void setHasThousandsGrouping(const char *position) {
    402     HasThousandsGrouping = true;
    403     HasThousandsGrouping.setPosition(position);
    404   }
    405   void setIsLeftJustified(const char *position) {
    406     IsLeftJustified = true;
    407     IsLeftJustified.setPosition(position);
    408   }
    409   void setHasPlusPrefix(const char *position) {
    410     HasPlusPrefix = true;
    411     HasPlusPrefix.setPosition(position);
    412   }
    413   void setHasSpacePrefix(const char *position) {
    414     HasSpacePrefix = true;
    415     HasSpacePrefix.setPosition(position);
    416   }
    417   void setHasAlternativeForm(const char *position) {
    418     HasAlternativeForm = true;
    419     HasAlternativeForm.setPosition(position);
    420   }
    421   void setHasLeadingZeros(const char *position) {
    422     HasLeadingZeroes = true;
    423     HasLeadingZeroes.setPosition(position);
    424   }
    425   void setUsesPositionalArg() { UsesPositionalArg = true; }
    426 
    427     // Methods for querying the format specifier.
    428 
    429   const PrintfConversionSpecifier &getConversionSpecifier() const {
    430     return cast<PrintfConversionSpecifier>(CS);
    431   }
    432 
    433   void setPrecision(const OptionalAmount &Amt) {
    434     Precision = Amt;
    435     Precision.setUsesDotPrefix();
    436   }
    437 
    438   const OptionalAmount &getPrecision() const {
    439     return Precision;
    440   }
    441 
    442   bool consumesDataArgument() const {
    443     return getConversionSpecifier().consumesDataArgument();
    444   }
    445 
    446   /// \brief Returns the builtin type that a data argument
    447   /// paired with this format specifier should have.  This method
    448   /// will return null if the format specifier does not have
    449   /// a matching data argument or the matching argument matches
    450   /// more than one type.
    451   ArgTypeResult getArgType(ASTContext &Ctx) const;
    452 
    453   const OptionalFlag &hasThousandsGrouping() const {
    454       return HasThousandsGrouping;
    455   }
    456   const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
    457   const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
    458   const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
    459   const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
    460   const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
    461   bool usesPositionalArg() const { return UsesPositionalArg; }
    462 
    463     /// Changes the specifier and length according to a QualType, retaining any
    464     /// flags or options. Returns true on success, or false when a conversion
    465     /// was not successful.
    466   bool fixType(QualType QT);
    467 
    468   void toString(llvm::raw_ostream &os) const;
    469 
    470     // Validation methods - to check if any element results in undefined behavior
    471   bool hasValidPlusPrefix() const;
    472   bool hasValidAlternativeForm() const;
    473   bool hasValidLeadingZeros() const;
    474   bool hasValidSpacePrefix() const;
    475   bool hasValidLeftJustified() const;
    476   bool hasValidThousandsGroupingPrefix() const;
    477 
    478   bool hasValidPrecision() const;
    479   bool hasValidFieldWidth() const;
    480 };
    481 }  // end analyze_printf namespace
    482 
    483 //===----------------------------------------------------------------------===//
    484 /// Pieces specific to fscanf format strings.
    485 
    486 namespace analyze_scanf {
    487 
    488 class ScanfConversionSpecifier :
    489     public analyze_format_string::ConversionSpecifier  {
    490 public:
    491   ScanfConversionSpecifier()
    492     : ConversionSpecifier(false, 0, InvalidSpecifier) {}
    493 
    494   ScanfConversionSpecifier(const char *pos, Kind k)
    495     : ConversionSpecifier(false, pos, k) {}
    496 
    497   void setEndScanList(const char *pos) { EndScanList = pos; }
    498 
    499   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
    500     return !CS->isPrintfKind();
    501   }
    502 };
    503 
    504 using analyze_format_string::LengthModifier;
    505 using analyze_format_string::OptionalAmount;
    506 using analyze_format_string::OptionalFlag;
    507 
    508 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
    509   OptionalFlag SuppressAssignment; // '*'
    510 public:
    511   ScanfSpecifier() :
    512     FormatSpecifier(/* isPrintf = */ false),
    513     SuppressAssignment("*") {}
    514 
    515   void setSuppressAssignment(const char *position) {
    516     SuppressAssignment = true;
    517     SuppressAssignment.setPosition(position);
    518   }
    519 
    520   const OptionalFlag &getSuppressAssignment() const {
    521     return SuppressAssignment;
    522   }
    523 
    524   void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
    525     CS = cs;
    526   }
    527 
    528   const ScanfConversionSpecifier &getConversionSpecifier() const {
    529     return cast<ScanfConversionSpecifier>(CS);
    530   }
    531 
    532   bool consumesDataArgument() const {
    533     return CS.consumesDataArgument() && !SuppressAssignment;
    534   }
    535 
    536   static ScanfSpecifier Parse(const char *beg, const char *end);
    537 };
    538 
    539 } // end analyze_scanf namespace
    540 
    541 //===----------------------------------------------------------------------===//
    542 // Parsing and processing of format strings (both fprintf and fscanf).
    543 
    544 namespace analyze_format_string {
    545 
    546 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
    547 
    548 class FormatStringHandler {
    549 public:
    550   FormatStringHandler() {}
    551   virtual ~FormatStringHandler();
    552 
    553   virtual void HandleNullChar(const char *nullCharacter) {}
    554 
    555   virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
    556                                      PositionContext p) {}
    557 
    558   virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
    559 
    560   virtual void HandleIncompleteSpecifier(const char *startSpecifier,
    561                                          unsigned specifierLen) {}
    562 
    563   // Printf-specific handlers.
    564 
    565   virtual bool HandleInvalidPrintfConversionSpecifier(
    566                                       const analyze_printf::PrintfSpecifier &FS,
    567                                       const char *startSpecifier,
    568                                       unsigned specifierLen) {
    569     return true;
    570   }
    571 
    572   virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
    573                                      const char *startSpecifier,
    574                                      unsigned specifierLen) {
    575     return true;
    576   }
    577 
    578     // Scanf-specific handlers.
    579 
    580   virtual bool HandleInvalidScanfConversionSpecifier(
    581                                         const analyze_scanf::ScanfSpecifier &FS,
    582                                         const char *startSpecifier,
    583                                         unsigned specifierLen) {
    584     return true;
    585   }
    586 
    587   virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
    588                                     const char *startSpecifier,
    589                                     unsigned specifierLen) {
    590     return true;
    591   }
    592 
    593   virtual void HandleIncompleteScanList(const char *start, const char *end) {}
    594 };
    595 
    596 bool ParsePrintfString(FormatStringHandler &H,
    597                        const char *beg, const char *end);
    598 
    599 bool ParseScanfString(FormatStringHandler &H,
    600                        const char *beg, const char *end);
    601 
    602 } // end analyze_format_string namespace
    603 } // end clang namespace
    604 #endif
    605