1 //===--- TokenConcatenation.cpp - Token Concatenation Avoidance -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the TokenConcatenation class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Lex/TokenConcatenation.h" 15 #include "clang/Lex/Preprocessor.h" 16 #include "llvm/Support/ErrorHandling.h" 17 using namespace clang; 18 19 20 /// StartsWithL - Return true if the spelling of this token starts with 'L'. 21 bool TokenConcatenation::StartsWithL(const Token &Tok) const { 22 if (!Tok.needsCleaning()) { 23 SourceManager &SM = PP.getSourceManager(); 24 return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L'; 25 } 26 27 if (Tok.getLength() < 256) { 28 char Buffer[256]; 29 const char *TokPtr = Buffer; 30 PP.getSpelling(Tok, TokPtr); 31 return TokPtr[0] == 'L'; 32 } 33 34 return PP.getSpelling(Tok)[0] == 'L'; 35 } 36 37 /// IsIdentifierL - Return true if the spelling of this token is literally 38 /// 'L'. 39 bool TokenConcatenation::IsIdentifierL(const Token &Tok) const { 40 if (!Tok.needsCleaning()) { 41 if (Tok.getLength() != 1) 42 return false; 43 SourceManager &SM = PP.getSourceManager(); 44 return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L'; 45 } 46 47 if (Tok.getLength() < 256) { 48 char Buffer[256]; 49 const char *TokPtr = Buffer; 50 if (PP.getSpelling(Tok, TokPtr) != 1) 51 return false; 52 return TokPtr[0] == 'L'; 53 } 54 55 return PP.getSpelling(Tok) == "L"; 56 } 57 58 TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) { 59 memset(TokenInfo, 0, sizeof(TokenInfo)); 60 61 // These tokens have custom code in AvoidConcat. 62 TokenInfo[tok::identifier ] |= aci_custom; 63 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar; 64 TokenInfo[tok::period ] |= aci_custom_firstchar; 65 TokenInfo[tok::amp ] |= aci_custom_firstchar; 66 TokenInfo[tok::plus ] |= aci_custom_firstchar; 67 TokenInfo[tok::minus ] |= aci_custom_firstchar; 68 TokenInfo[tok::slash ] |= aci_custom_firstchar; 69 TokenInfo[tok::less ] |= aci_custom_firstchar; 70 TokenInfo[tok::greater ] |= aci_custom_firstchar; 71 TokenInfo[tok::pipe ] |= aci_custom_firstchar; 72 TokenInfo[tok::percent ] |= aci_custom_firstchar; 73 TokenInfo[tok::colon ] |= aci_custom_firstchar; 74 TokenInfo[tok::hash ] |= aci_custom_firstchar; 75 TokenInfo[tok::arrow ] |= aci_custom_firstchar; 76 77 // These tokens change behavior if followed by an '='. 78 TokenInfo[tok::amp ] |= aci_avoid_equal; // &= 79 TokenInfo[tok::plus ] |= aci_avoid_equal; // += 80 TokenInfo[tok::minus ] |= aci_avoid_equal; // -= 81 TokenInfo[tok::slash ] |= aci_avoid_equal; // /= 82 TokenInfo[tok::less ] |= aci_avoid_equal; // <= 83 TokenInfo[tok::greater ] |= aci_avoid_equal; // >= 84 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |= 85 TokenInfo[tok::percent ] |= aci_avoid_equal; // %= 86 TokenInfo[tok::star ] |= aci_avoid_equal; // *= 87 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // != 88 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<= 89 TokenInfo[tok::greatergreater] |= aci_avoid_equal; // >>= 90 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^= 91 TokenInfo[tok::equal ] |= aci_avoid_equal; // == 92 } 93 94 /// GetFirstChar - Get the first character of the token \arg Tok, 95 /// avoiding calls to getSpelling where possible. 96 static char GetFirstChar(Preprocessor &PP, const Token &Tok) { 97 if (IdentifierInfo *II = Tok.getIdentifierInfo()) { 98 // Avoid spelling identifiers, the most common form of token. 99 return II->getNameStart()[0]; 100 } else if (!Tok.needsCleaning()) { 101 if (Tok.isLiteral() && Tok.getLiteralData()) { 102 return *Tok.getLiteralData(); 103 } else { 104 SourceManager &SM = PP.getSourceManager(); 105 return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())); 106 } 107 } else if (Tok.getLength() < 256) { 108 char Buffer[256]; 109 const char *TokPtr = Buffer; 110 PP.getSpelling(Tok, TokPtr); 111 return TokPtr[0]; 112 } else { 113 return PP.getSpelling(Tok)[0]; 114 } 115 } 116 117 /// AvoidConcat - If printing PrevTok immediately followed by Tok would cause 118 /// the two individual tokens to be lexed as a single token, return true 119 /// (which causes a space to be printed between them). This allows the output 120 /// of -E mode to be lexed to the same token stream as lexing the input 121 /// directly would. 122 /// 123 /// This code must conservatively return true if it doesn't want to be 100% 124 /// accurate. This will cause the output to include extra space characters, 125 /// but the resulting output won't have incorrect concatenations going on. 126 /// Examples include "..", which we print with a space between, because we 127 /// don't want to track enough to tell "x.." from "...". 128 bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, 129 const Token &PrevTok, 130 const Token &Tok) const { 131 // First, check to see if the tokens were directly adjacent in the original 132 // source. If they were, it must be okay to stick them together: if there 133 // were an issue, the tokens would have been lexed differently. 134 if (PrevTok.getLocation().isFileID() && Tok.getLocation().isFileID() && 135 PrevTok.getLocation().getFileLocWithOffset(PrevTok.getLength()) == 136 Tok.getLocation()) 137 return false; 138 139 tok::TokenKind PrevKind = PrevTok.getKind(); 140 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator. 141 PrevKind = tok::identifier; 142 143 // Look up information on when we should avoid concatenation with prevtok. 144 unsigned ConcatInfo = TokenInfo[PrevKind]; 145 146 // If prevtok never causes a problem for anything after it, return quickly. 147 if (ConcatInfo == 0) return false; 148 149 if (ConcatInfo & aci_avoid_equal) { 150 // If the next token is '=' or '==', avoid concatenation. 151 if (Tok.is(tok::equal) || Tok.is(tok::equalequal)) 152 return true; 153 ConcatInfo &= ~aci_avoid_equal; 154 } 155 156 if (ConcatInfo == 0) return false; 157 158 // Basic algorithm: we look at the first character of the second token, and 159 // determine whether it, if appended to the first token, would form (or 160 // would contribute) to a larger token if concatenated. 161 char FirstChar = 0; 162 if (ConcatInfo & aci_custom) { 163 // If the token does not need to know the first character, don't get it. 164 } else { 165 FirstChar = GetFirstChar(PP, Tok); 166 } 167 168 switch (PrevKind) { 169 default: 170 llvm_unreachable("InitAvoidConcatTokenInfo built wrong"); 171 return true; 172 173 case tok::raw_identifier: 174 llvm_unreachable("tok::raw_identifier in non-raw lexing mode!"); 175 return true; 176 177 case tok::identifier: // id+id or id+number or id+L"foo". 178 // id+'.'... will not append. 179 if (Tok.is(tok::numeric_constant)) 180 return GetFirstChar(PP, Tok) != '.'; 181 182 if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) /* || 183 Tok.is(tok::wide_char_literal)*/) 184 return true; 185 186 // If this isn't identifier + string, we're done. 187 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal)) 188 return false; 189 190 // FIXME: need a wide_char_constant! 191 192 // If the string was a wide string L"foo" or wide char L'f', it would 193 // concat with the previous identifier into fooL"bar". Avoid this. 194 if (StartsWithL(Tok)) 195 return true; 196 197 // Otherwise, this is a narrow character or string. If the *identifier* 198 // is a literal 'L', avoid pasting L "foo" -> L"foo". 199 return IsIdentifierL(PrevTok); 200 case tok::numeric_constant: 201 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) || 202 FirstChar == '+' || FirstChar == '-' || FirstChar == '.'; 203 case tok::period: // ..., .*, .1234 204 return (FirstChar == '.' && PrevPrevTok.is(tok::period)) || 205 isdigit(FirstChar) || 206 (PP.getLangOptions().CPlusPlus && FirstChar == '*'); 207 case tok::amp: // && 208 return FirstChar == '&'; 209 case tok::plus: // ++ 210 return FirstChar == '+'; 211 case tok::minus: // --, ->, ->* 212 return FirstChar == '-' || FirstChar == '>'; 213 case tok::slash: //, /*, // 214 return FirstChar == '*' || FirstChar == '/'; 215 case tok::less: // <<, <<=, <:, <% 216 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%'; 217 case tok::greater: // >>, >>= 218 return FirstChar == '>'; 219 case tok::pipe: // || 220 return FirstChar == '|'; 221 case tok::percent: // %>, %: 222 return FirstChar == '>' || FirstChar == ':'; 223 case tok::colon: // ::, :> 224 return FirstChar == '>' || 225 (PP.getLangOptions().CPlusPlus && FirstChar == ':'); 226 case tok::hash: // ##, #@, %:%: 227 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%'; 228 case tok::arrow: // ->* 229 return PP.getLangOptions().CPlusPlus && FirstChar == '*'; 230 } 231 } 232