1 //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the NumericLiteralParser, CharLiteralParser, and 11 // StringLiteralParser interfaces. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef CLANG_LITERALSUPPORT_H 16 #define CLANG_LITERALSUPPORT_H 17 18 #include "clang/Basic/CharInfo.h" 19 #include "clang/Basic/LLVM.h" 20 #include "clang/Basic/TokenKinds.h" 21 #include "llvm/ADT/APFloat.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/DataTypes.h" 25 26 namespace clang { 27 28 class DiagnosticsEngine; 29 class Preprocessor; 30 class Token; 31 class SourceLocation; 32 class TargetInfo; 33 class SourceManager; 34 class LangOptions; 35 36 /// NumericLiteralParser - This performs strict semantic analysis of the content 37 /// of a ppnumber, classifying it as either integer, floating, or erroneous, 38 /// determines the radix of the value and can convert it to a useful value. 39 class NumericLiteralParser { 40 Preprocessor &PP; // needed for diagnostics 41 42 const char *const ThisTokBegin; 43 const char *const ThisTokEnd; 44 const char *DigitsBegin, *SuffixBegin; // markers 45 const char *s; // cursor 46 47 unsigned radix; 48 49 bool saw_exponent, saw_period, saw_ud_suffix; 50 51 public: 52 NumericLiteralParser(StringRef TokSpelling, 53 SourceLocation TokLoc, 54 Preprocessor &PP); 55 bool hadError; 56 bool isUnsigned; 57 bool isLong; // This is *not* set for long long. 58 bool isLongLong; 59 bool isFloat; // 1.0f 60 bool isImaginary; // 1.0i 61 bool isMicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. 62 63 bool isIntegerLiteral() const { 64 return !saw_period && !saw_exponent; 65 } 66 bool isFloatingLiteral() const { 67 return saw_period || saw_exponent; 68 } 69 70 bool hasUDSuffix() const { 71 return saw_ud_suffix; 72 } 73 StringRef getUDSuffix() const { 74 assert(saw_ud_suffix); 75 return StringRef(SuffixBegin, ThisTokEnd - SuffixBegin); 76 } 77 unsigned getUDSuffixOffset() const { 78 assert(saw_ud_suffix); 79 return SuffixBegin - ThisTokBegin; 80 } 81 82 static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); 83 84 unsigned getRadix() const { return radix; } 85 86 /// GetIntegerValue - Convert this numeric literal value to an APInt that 87 /// matches Val's input width. If there is an overflow (i.e., if the unsigned 88 /// value read is larger than the APInt's bits will hold), set Val to the low 89 /// bits of the result and return true. Otherwise, return false. 90 bool GetIntegerValue(llvm::APInt &Val); 91 92 /// GetFloatValue - Convert this numeric literal to a floating value, using 93 /// the specified APFloat fltSemantics (specifying float, double, etc). 94 /// The optional bool isExact (passed-by-reference) has its value 95 /// set to true if the returned APFloat can represent the number in the 96 /// literal exactly, and false otherwise. 97 llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); 98 99 private: 100 101 void ParseNumberStartingWithZero(SourceLocation TokLoc); 102 103 /// SkipHexDigits - Read and skip over any hex digits, up to End. 104 /// Return a pointer to the first non-hex digit or End. 105 const char *SkipHexDigits(const char *ptr) { 106 while (ptr != ThisTokEnd && isHexDigit(*ptr)) 107 ptr++; 108 return ptr; 109 } 110 111 /// SkipOctalDigits - Read and skip over any octal digits, up to End. 112 /// Return a pointer to the first non-hex digit or End. 113 const char *SkipOctalDigits(const char *ptr) { 114 while (ptr != ThisTokEnd && ((*ptr >= '0') && (*ptr <= '7'))) 115 ptr++; 116 return ptr; 117 } 118 119 /// SkipDigits - Read and skip over any digits, up to End. 120 /// Return a pointer to the first non-hex digit or End. 121 const char *SkipDigits(const char *ptr) { 122 while (ptr != ThisTokEnd && isDigit(*ptr)) 123 ptr++; 124 return ptr; 125 } 126 127 /// SkipBinaryDigits - Read and skip over any binary digits, up to End. 128 /// Return a pointer to the first non-binary digit or End. 129 const char *SkipBinaryDigits(const char *ptr) { 130 while (ptr != ThisTokEnd && (*ptr == '0' || *ptr == '1')) 131 ptr++; 132 return ptr; 133 } 134 135 }; 136 137 /// CharLiteralParser - Perform interpretation and semantic analysis of a 138 /// character literal. 139 class CharLiteralParser { 140 uint64_t Value; 141 tok::TokenKind Kind; 142 bool IsMultiChar; 143 bool HadError; 144 SmallString<32> UDSuffixBuf; 145 unsigned UDSuffixOffset; 146 public: 147 CharLiteralParser(const char *begin, const char *end, 148 SourceLocation Loc, Preprocessor &PP, 149 tok::TokenKind kind); 150 151 bool hadError() const { return HadError; } 152 bool isAscii() const { return Kind == tok::char_constant; } 153 bool isWide() const { return Kind == tok::wide_char_constant; } 154 bool isUTF16() const { return Kind == tok::utf16_char_constant; } 155 bool isUTF32() const { return Kind == tok::utf32_char_constant; } 156 bool isMultiChar() const { return IsMultiChar; } 157 uint64_t getValue() const { return Value; } 158 StringRef getUDSuffix() const { return UDSuffixBuf; } 159 unsigned getUDSuffixOffset() const { 160 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 161 return UDSuffixOffset; 162 } 163 }; 164 165 /// StringLiteralParser - This decodes string escape characters and performs 166 /// wide string analysis and Translation Phase #6 (concatenation of string 167 /// literals) (C99 5.1.1.2p1). 168 class StringLiteralParser { 169 const SourceManager &SM; 170 const LangOptions &Features; 171 const TargetInfo &Target; 172 DiagnosticsEngine *Diags; 173 174 unsigned MaxTokenLength; 175 unsigned SizeBound; 176 unsigned CharByteWidth; 177 tok::TokenKind Kind; 178 SmallString<512> ResultBuf; 179 char *ResultPtr; // cursor 180 SmallString<32> UDSuffixBuf; 181 unsigned UDSuffixToken; 182 unsigned UDSuffixOffset; 183 public: 184 StringLiteralParser(const Token *StringToks, unsigned NumStringToks, 185 Preprocessor &PP, bool Complain = true); 186 StringLiteralParser(const Token *StringToks, unsigned NumStringToks, 187 const SourceManager &sm, const LangOptions &features, 188 const TargetInfo &target, DiagnosticsEngine *diags = 0) 189 : SM(sm), Features(features), Target(target), Diags(diags), 190 MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), 191 ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { 192 init(StringToks, NumStringToks); 193 } 194 195 196 bool hadError; 197 bool Pascal; 198 199 StringRef GetString() const { 200 return StringRef(ResultBuf.data(), GetStringLength()); 201 } 202 unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } 203 204 unsigned GetNumStringChars() const { 205 return GetStringLength() / CharByteWidth; 206 } 207 /// getOffsetOfStringByte - This function returns the offset of the 208 /// specified byte of the string data represented by Token. This handles 209 /// advancing over escape sequences in the string. 210 /// 211 /// If the Diagnostics pointer is non-null, then this will do semantic 212 /// checking of the string literal and emit errors and warnings. 213 unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; 214 215 bool isAscii() const { return Kind == tok::string_literal; } 216 bool isWide() const { return Kind == tok::wide_string_literal; } 217 bool isUTF8() const { return Kind == tok::utf8_string_literal; } 218 bool isUTF16() const { return Kind == tok::utf16_string_literal; } 219 bool isUTF32() const { return Kind == tok::utf32_string_literal; } 220 bool isPascal() const { return Pascal; } 221 222 StringRef getUDSuffix() const { return UDSuffixBuf; } 223 224 /// Get the index of a token containing a ud-suffix. 225 unsigned getUDSuffixToken() const { 226 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 227 return UDSuffixToken; 228 } 229 /// Get the spelling offset of the first byte of the ud-suffix. 230 unsigned getUDSuffixOffset() const { 231 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 232 return UDSuffixOffset; 233 } 234 235 private: 236 void init(const Token *StringToks, unsigned NumStringToks); 237 bool CopyStringFragment(const Token &Tok, const char *TokBegin, 238 StringRef Fragment); 239 void DiagnoseLexingError(SourceLocation Loc); 240 }; 241 242 } // end namespace clang 243 244 #endif 245