1 //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the NumericLiteralParser, CharLiteralParser, and 11 // StringLiteralParser interfaces. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef CLANG_LITERALSUPPORT_H 16 #define CLANG_LITERALSUPPORT_H 17 18 #include "clang/Basic/CharInfo.h" 19 #include "clang/Basic/LLVM.h" 20 #include "clang/Basic/TokenKinds.h" 21 #include "llvm/ADT/APFloat.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/DataTypes.h" 25 26 namespace clang { 27 28 class DiagnosticsEngine; 29 class Preprocessor; 30 class Token; 31 class SourceLocation; 32 class TargetInfo; 33 class SourceManager; 34 class LangOptions; 35 36 /// NumericLiteralParser - This performs strict semantic analysis of the content 37 /// of a ppnumber, classifying it as either integer, floating, or erroneous, 38 /// determines the radix of the value and can convert it to a useful value. 39 class NumericLiteralParser { 40 Preprocessor &PP; // needed for diagnostics 41 42 const char *const ThisTokBegin; 43 const char *const ThisTokEnd; 44 const char *DigitsBegin, *SuffixBegin; // markers 45 const char *s; // cursor 46 47 unsigned radix; 48 49 bool saw_exponent, saw_period, saw_ud_suffix; 50 51 public: 52 NumericLiteralParser(StringRef TokSpelling, 53 SourceLocation TokLoc, 54 Preprocessor &PP); 55 bool hadError; 56 bool isUnsigned; 57 bool isLong; // This is *not* set for long long. 58 bool isLongLong; 59 bool isFloat; // 1.0f 60 bool isImaginary; // 1.0i 61 bool isMicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. 62 63 bool isIntegerLiteral() const { 64 return !saw_period && !saw_exponent; 65 } 66 bool isFloatingLiteral() const { 67 return saw_period || saw_exponent; 68 } 69 70 bool hasUDSuffix() const { 71 return saw_ud_suffix; 72 } 73 StringRef getUDSuffix() const { 74 assert(saw_ud_suffix); 75 return StringRef(SuffixBegin, ThisTokEnd - SuffixBegin); 76 } 77 unsigned getUDSuffixOffset() const { 78 assert(saw_ud_suffix); 79 return SuffixBegin - ThisTokBegin; 80 } 81 82 unsigned getRadix() const { return radix; } 83 84 /// GetIntegerValue - Convert this numeric literal value to an APInt that 85 /// matches Val's input width. If there is an overflow (i.e., if the unsigned 86 /// value read is larger than the APInt's bits will hold), set Val to the low 87 /// bits of the result and return true. Otherwise, return false. 88 bool GetIntegerValue(llvm::APInt &Val); 89 90 /// GetFloatValue - Convert this numeric literal to a floating value, using 91 /// the specified APFloat fltSemantics (specifying float, double, etc). 92 /// The optional bool isExact (passed-by-reference) has its value 93 /// set to true if the returned APFloat can represent the number in the 94 /// literal exactly, and false otherwise. 95 llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); 96 97 private: 98 99 void ParseNumberStartingWithZero(SourceLocation TokLoc); 100 101 /// SkipHexDigits - Read and skip over any hex digits, up to End. 102 /// Return a pointer to the first non-hex digit or End. 103 const char *SkipHexDigits(const char *ptr) { 104 while (ptr != ThisTokEnd && isHexDigit(*ptr)) 105 ptr++; 106 return ptr; 107 } 108 109 /// SkipOctalDigits - Read and skip over any octal digits, up to End. 110 /// Return a pointer to the first non-hex digit or End. 111 const char *SkipOctalDigits(const char *ptr) { 112 while (ptr != ThisTokEnd && ((*ptr >= '0') && (*ptr <= '7'))) 113 ptr++; 114 return ptr; 115 } 116 117 /// SkipDigits - Read and skip over any digits, up to End. 118 /// Return a pointer to the first non-hex digit or End. 119 const char *SkipDigits(const char *ptr) { 120 while (ptr != ThisTokEnd && isDigit(*ptr)) 121 ptr++; 122 return ptr; 123 } 124 125 /// SkipBinaryDigits - Read and skip over any binary digits, up to End. 126 /// Return a pointer to the first non-binary digit or End. 127 const char *SkipBinaryDigits(const char *ptr) { 128 while (ptr != ThisTokEnd && (*ptr == '0' || *ptr == '1')) 129 ptr++; 130 return ptr; 131 } 132 133 }; 134 135 /// CharLiteralParser - Perform interpretation and semantic analysis of a 136 /// character literal. 137 class CharLiteralParser { 138 uint64_t Value; 139 tok::TokenKind Kind; 140 bool IsMultiChar; 141 bool HadError; 142 SmallString<32> UDSuffixBuf; 143 unsigned UDSuffixOffset; 144 public: 145 CharLiteralParser(const char *begin, const char *end, 146 SourceLocation Loc, Preprocessor &PP, 147 tok::TokenKind kind); 148 149 bool hadError() const { return HadError; } 150 bool isAscii() const { return Kind == tok::char_constant; } 151 bool isWide() const { return Kind == tok::wide_char_constant; } 152 bool isUTF16() const { return Kind == tok::utf16_char_constant; } 153 bool isUTF32() const { return Kind == tok::utf32_char_constant; } 154 bool isMultiChar() const { return IsMultiChar; } 155 uint64_t getValue() const { return Value; } 156 StringRef getUDSuffix() const { return UDSuffixBuf; } 157 unsigned getUDSuffixOffset() const { 158 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 159 return UDSuffixOffset; 160 } 161 }; 162 163 /// StringLiteralParser - This decodes string escape characters and performs 164 /// wide string analysis and Translation Phase #6 (concatenation of string 165 /// literals) (C99 5.1.1.2p1). 166 class StringLiteralParser { 167 const SourceManager &SM; 168 const LangOptions &Features; 169 const TargetInfo &Target; 170 DiagnosticsEngine *Diags; 171 172 unsigned MaxTokenLength; 173 unsigned SizeBound; 174 unsigned CharByteWidth; 175 tok::TokenKind Kind; 176 SmallString<512> ResultBuf; 177 char *ResultPtr; // cursor 178 SmallString<32> UDSuffixBuf; 179 unsigned UDSuffixToken; 180 unsigned UDSuffixOffset; 181 public: 182 StringLiteralParser(const Token *StringToks, unsigned NumStringToks, 183 Preprocessor &PP, bool Complain = true); 184 StringLiteralParser(const Token *StringToks, unsigned NumStringToks, 185 const SourceManager &sm, const LangOptions &features, 186 const TargetInfo &target, DiagnosticsEngine *diags = 0) 187 : SM(sm), Features(features), Target(target), Diags(diags), 188 MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), 189 ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { 190 init(StringToks, NumStringToks); 191 } 192 193 194 bool hadError; 195 bool Pascal; 196 197 StringRef GetString() const { 198 return StringRef(ResultBuf.data(), GetStringLength()); 199 } 200 unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } 201 202 unsigned GetNumStringChars() const { 203 return GetStringLength() / CharByteWidth; 204 } 205 /// getOffsetOfStringByte - This function returns the offset of the 206 /// specified byte of the string data represented by Token. This handles 207 /// advancing over escape sequences in the string. 208 /// 209 /// If the Diagnostics pointer is non-null, then this will do semantic 210 /// checking of the string literal and emit errors and warnings. 211 unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; 212 213 bool isAscii() const { return Kind == tok::string_literal; } 214 bool isWide() const { return Kind == tok::wide_string_literal; } 215 bool isUTF8() const { return Kind == tok::utf8_string_literal; } 216 bool isUTF16() const { return Kind == tok::utf16_string_literal; } 217 bool isUTF32() const { return Kind == tok::utf32_string_literal; } 218 bool isPascal() const { return Pascal; } 219 220 StringRef getUDSuffix() const { return UDSuffixBuf; } 221 222 /// Get the index of a token containing a ud-suffix. 223 unsigned getUDSuffixToken() const { 224 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 225 return UDSuffixToken; 226 } 227 /// Get the spelling offset of the first byte of the ud-suffix. 228 unsigned getUDSuffixOffset() const { 229 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 230 return UDSuffixOffset; 231 } 232 233 private: 234 void init(const Token *StringToks, unsigned NumStringToks); 235 bool CopyStringFragment(const Token &Tok, const char *TokBegin, 236 StringRef Fragment); 237 void DiagnoseLexingError(SourceLocation Loc); 238 }; 239 240 } // end namespace clang 241 242 #endif 243