Home | History | Annotate | Download | only in Analysis
      1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Handling of format string in scanf and friends.  The structure of format
     11 // strings for fscanf() are described in C99 7.19.6.2.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "clang/Analysis/Analyses/FormatString.h"
     16 #include "FormatStringParsing.h"
     17 
     18 using clang::analyze_format_string::ArgTypeResult;
     19 using clang::analyze_format_string::FormatStringHandler;
     20 using clang::analyze_format_string::LengthModifier;
     21 using clang::analyze_format_string::OptionalAmount;
     22 using clang::analyze_format_string::ConversionSpecifier;
     23 using clang::analyze_scanf::ScanfConversionSpecifier;
     24 using clang::analyze_scanf::ScanfSpecifier;
     25 using clang::UpdateOnReturn;
     26 
     27 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
     28         ScanfSpecifierResult;
     29 
     30 static bool ParseScanList(FormatStringHandler &H,
     31                           ScanfConversionSpecifier &CS,
     32                           const char *&Beg, const char *E) {
     33   const char *I = Beg;
     34   const char *start = I - 1;
     35   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
     36 
     37   // No more characters?
     38   if (I == E) {
     39     H.HandleIncompleteScanList(start, I);
     40     return true;
     41   }
     42 
     43   // Special case: ']' is the first character.
     44   if (*I == ']') {
     45     if (++I == E) {
     46       H.HandleIncompleteScanList(start, I - 1);
     47       return true;
     48     }
     49   }
     50 
     51   // Look for a ']' character which denotes the end of the scan list.
     52   while (*I != ']') {
     53     if (++I == E) {
     54       H.HandleIncompleteScanList(start, I - 1);
     55       return true;
     56     }
     57   }
     58 
     59   CS.setEndScanList(I);
     60   return false;
     61 }
     62 
     63 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
     64 // We can possibly refactor.
     65 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
     66                                                 const char *&Beg,
     67                                                 const char *E,
     68                                                 unsigned &argIndex) {
     69 
     70   using namespace clang::analyze_scanf;
     71   const char *I = Beg;
     72   const char *Start = 0;
     73   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
     74 
     75     // Look for a '%' character that indicates the start of a format specifier.
     76   for ( ; I != E ; ++I) {
     77     char c = *I;
     78     if (c == '\0') {
     79         // Detect spurious null characters, which are likely errors.
     80       H.HandleNullChar(I);
     81       return true;
     82     }
     83     if (c == '%') {
     84       Start = I++;  // Record the start of the format specifier.
     85       break;
     86     }
     87   }
     88 
     89     // No format specifier found?
     90   if (!Start)
     91     return false;
     92 
     93   if (I == E) {
     94       // No more characters left?
     95     H.HandleIncompleteSpecifier(Start, E - Start);
     96     return true;
     97   }
     98 
     99   ScanfSpecifier FS;
    100   if (ParseArgPosition(H, FS, Start, I, E))
    101     return true;
    102 
    103   if (I == E) {
    104       // No more characters left?
    105     H.HandleIncompleteSpecifier(Start, E - Start);
    106     return true;
    107   }
    108 
    109   // Look for '*' flag if it is present.
    110   if (*I == '*') {
    111     FS.setSuppressAssignment(I);
    112     if (++I == E) {
    113       H.HandleIncompleteSpecifier(Start, E - Start);
    114       return true;
    115     }
    116   }
    117 
    118   // Look for the field width (if any).  Unlike printf, this is either
    119   // a fixed integer or isn't present.
    120   const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
    121   if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
    122     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
    123     FS.setFieldWidth(Amt);
    124 
    125     if (I == E) {
    126       // No more characters left?
    127       H.HandleIncompleteSpecifier(Start, E - Start);
    128       return true;
    129     }
    130   }
    131 
    132   // Look for the length modifier.
    133   if (ParseLengthModifier(FS, I, E) && I == E) {
    134       // No more characters left?
    135     H.HandleIncompleteSpecifier(Start, E - Start);
    136     return true;
    137   }
    138 
    139   // Detect spurious null characters, which are likely errors.
    140   if (*I == '\0') {
    141     H.HandleNullChar(I);
    142     return true;
    143   }
    144 
    145   // Finally, look for the conversion specifier.
    146   const char *conversionPosition = I++;
    147   ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
    148   switch (*conversionPosition) {
    149     default:
    150       break;
    151     case '%': k = ConversionSpecifier::PercentArg;   break;
    152     case 'A': k = ConversionSpecifier::AArg; break;
    153     case 'E': k = ConversionSpecifier::EArg; break;
    154     case 'F': k = ConversionSpecifier::FArg; break;
    155     case 'G': k = ConversionSpecifier::GArg; break;
    156     case 'X': k = ConversionSpecifier::XArg; break;
    157     case 'a': k = ConversionSpecifier::aArg; break;
    158     case 'd': k = ConversionSpecifier::dArg; break;
    159     case 'e': k = ConversionSpecifier::eArg; break;
    160     case 'f': k = ConversionSpecifier::fArg; break;
    161     case 'g': k = ConversionSpecifier::gArg; break;
    162     case 'i': k = ConversionSpecifier::iArg; break;
    163     case 'n': k = ConversionSpecifier::nArg; break;
    164     case 'c': k = ConversionSpecifier::cArg; break;
    165     case 'C': k = ConversionSpecifier::CArg; break;
    166     case 'S': k = ConversionSpecifier::SArg; break;
    167     case '[': k = ConversionSpecifier::ScanListArg; break;
    168     case 'u': k = ConversionSpecifier::uArg; break;
    169     case 'x': k = ConversionSpecifier::xArg; break;
    170     case 'o': k = ConversionSpecifier::oArg; break;
    171     case 's': k = ConversionSpecifier::sArg; break;
    172     case 'p': k = ConversionSpecifier::pArg; break;
    173   }
    174   ScanfConversionSpecifier CS(conversionPosition, k);
    175   if (k == ScanfConversionSpecifier::ScanListArg) {
    176     if (!ParseScanList(H, CS, I, E))
    177       return true;
    178   }
    179   FS.setConversionSpecifier(CS);
    180   if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
    181       && !FS.usesPositionalArg())
    182     FS.setArgIndex(argIndex++);
    183 
    184   // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
    185   // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
    186 
    187   if (k == ScanfConversionSpecifier::InvalidSpecifier) {
    188     // Assume the conversion takes one argument.
    189     return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
    190   }
    191   return ScanfSpecifierResult(Start, FS);
    192 }
    193 
    194 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
    195                                                     const char *I,
    196                                                     const char *E) {
    197 
    198   unsigned argIndex = 0;
    199 
    200   // Keep looking for a format specifier until we have exhausted the string.
    201   while (I != E) {
    202     const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex);
    203     // Did a fail-stop error of any kind occur when parsing the specifier?
    204     // If so, don't do any more processing.
    205     if (FSR.shouldStop())
    206       return true;;
    207       // Did we exhaust the string or encounter an error that
    208       // we can recover from?
    209     if (!FSR.hasValue())
    210       continue;
    211       // We have a format specifier.  Pass it to the callback.
    212     if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
    213                                 I - FSR.getStart())) {
    214       return true;
    215     }
    216   }
    217   assert(I == E && "Format string not exhausted");
    218   return false;
    219 }
    220 
    221 
    222