Home | History | Annotate | Download | only in AsmParser
      1 //===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Implement the Lexer for .ll files.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "LLLexer.h"
     15 #include "llvm/DerivedTypes.h"
     16 #include "llvm/Instruction.h"
     17 #include "llvm/LLVMContext.h"
     18 #include "llvm/ADT/Twine.h"
     19 #include "llvm/Assembly/Parser.h"
     20 #include "llvm/Support/ErrorHandling.h"
     21 #include "llvm/Support/MemoryBuffer.h"
     22 #include "llvm/Support/MathExtras.h"
     23 #include "llvm/Support/SourceMgr.h"
     24 #include "llvm/Support/raw_ostream.h"
     25 #include <cctype>
     26 #include <cstdio>
     27 #include <cstdlib>
     28 #include <cstring>
     29 using namespace llvm;
     30 
     31 bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
     32   ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error");
     33   return true;
     34 }
     35 
     36 //===----------------------------------------------------------------------===//
     37 // Helper functions.
     38 //===----------------------------------------------------------------------===//
     39 
     40 // atoull - Convert an ascii string of decimal digits into the unsigned long
     41 // long representation... this does not have to do input error checking,
     42 // because we know that the input will be matched by a suitable regex...
     43 //
     44 uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
     45   uint64_t Result = 0;
     46   for (; Buffer != End; Buffer++) {
     47     uint64_t OldRes = Result;
     48     Result *= 10;
     49     Result += *Buffer-'0';
     50     if (Result < OldRes) {  // Uh, oh, overflow detected!!!
     51       Error("constant bigger than 64 bits detected!");
     52       return 0;
     53     }
     54   }
     55   return Result;
     56 }
     57 
     58 uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
     59   uint64_t Result = 0;
     60   for (; Buffer != End; ++Buffer) {
     61     uint64_t OldRes = Result;
     62     Result *= 16;
     63     char C = *Buffer;
     64     if (C >= '0' && C <= '9')
     65       Result += C-'0';
     66     else if (C >= 'A' && C <= 'F')
     67       Result += C-'A'+10;
     68     else if (C >= 'a' && C <= 'f')
     69       Result += C-'a'+10;
     70 
     71     if (Result < OldRes) {   // Uh, oh, overflow detected!!!
     72       Error("constant bigger than 64 bits detected!");
     73       return 0;
     74     }
     75   }
     76   return Result;
     77 }
     78 
     79 void LLLexer::HexToIntPair(const char *Buffer, const char *End,
     80                            uint64_t Pair[2]) {
     81   Pair[0] = 0;
     82   for (int i=0; i<16; i++, Buffer++) {
     83     assert(Buffer != End);
     84     Pair[0] *= 16;
     85     char C = *Buffer;
     86     if (C >= '0' && C <= '9')
     87       Pair[0] += C-'0';
     88     else if (C >= 'A' && C <= 'F')
     89       Pair[0] += C-'A'+10;
     90     else if (C >= 'a' && C <= 'f')
     91       Pair[0] += C-'a'+10;
     92   }
     93   Pair[1] = 0;
     94   for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
     95     Pair[1] *= 16;
     96     char C = *Buffer;
     97     if (C >= '0' && C <= '9')
     98       Pair[1] += C-'0';
     99     else if (C >= 'A' && C <= 'F')
    100       Pair[1] += C-'A'+10;
    101     else if (C >= 'a' && C <= 'f')
    102       Pair[1] += C-'a'+10;
    103   }
    104   if (Buffer != End)
    105     Error("constant bigger than 128 bits detected!");
    106 }
    107 
    108 /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
    109 /// { low64, high16 } as usual for an APInt.
    110 void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
    111                            uint64_t Pair[2]) {
    112   Pair[1] = 0;
    113   for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
    114     assert(Buffer != End);
    115     Pair[1] *= 16;
    116     char C = *Buffer;
    117     if (C >= '0' && C <= '9')
    118       Pair[1] += C-'0';
    119     else if (C >= 'A' && C <= 'F')
    120       Pair[1] += C-'A'+10;
    121     else if (C >= 'a' && C <= 'f')
    122       Pair[1] += C-'a'+10;
    123   }
    124   Pair[0] = 0;
    125   for (int i=0; i<16; i++, Buffer++) {
    126     Pair[0] *= 16;
    127     char C = *Buffer;
    128     if (C >= '0' && C <= '9')
    129       Pair[0] += C-'0';
    130     else if (C >= 'A' && C <= 'F')
    131       Pair[0] += C-'A'+10;
    132     else if (C >= 'a' && C <= 'f')
    133       Pair[0] += C-'a'+10;
    134   }
    135   if (Buffer != End)
    136     Error("constant bigger than 128 bits detected!");
    137 }
    138 
    139 // UnEscapeLexed - Run through the specified buffer and change \xx codes to the
    140 // appropriate character.
    141 static void UnEscapeLexed(std::string &Str) {
    142   if (Str.empty()) return;
    143 
    144   char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
    145   char *BOut = Buffer;
    146   for (char *BIn = Buffer; BIn != EndBuffer; ) {
    147     if (BIn[0] == '\\') {
    148       if (BIn < EndBuffer-1 && BIn[1] == '\\') {
    149         *BOut++ = '\\'; // Two \ becomes one
    150         BIn += 2;
    151       } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
    152         char Tmp = BIn[3]; BIn[3] = 0;      // Terminate string
    153         *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
    154         BIn[3] = Tmp;                       // Restore character
    155         BIn += 3;                           // Skip over handled chars
    156         ++BOut;
    157       } else {
    158         *BOut++ = *BIn++;
    159       }
    160     } else {
    161       *BOut++ = *BIn++;
    162     }
    163   }
    164   Str.resize(BOut-Buffer);
    165 }
    166 
    167 /// isLabelChar - Return true for [-a-zA-Z$._0-9].
    168 static bool isLabelChar(char C) {
    169   return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_';
    170 }
    171 
    172 
    173 /// isLabelTail - Return true if this pointer points to a valid end of a label.
    174 static const char *isLabelTail(const char *CurPtr) {
    175   while (1) {
    176     if (CurPtr[0] == ':') return CurPtr+1;
    177     if (!isLabelChar(CurPtr[0])) return 0;
    178     ++CurPtr;
    179   }
    180 }
    181 
    182 
    183 
    184 //===----------------------------------------------------------------------===//
    185 // Lexer definition.
    186 //===----------------------------------------------------------------------===//
    187 
    188 LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
    189                  LLVMContext &C)
    190   : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
    191   CurPtr = CurBuf->getBufferStart();
    192 }
    193 
    194 std::string LLLexer::getFilename() const {
    195   return CurBuf->getBufferIdentifier();
    196 }
    197 
    198 int LLLexer::getNextChar() {
    199   char CurChar = *CurPtr++;
    200   switch (CurChar) {
    201   default: return (unsigned char)CurChar;
    202   case 0:
    203     // A nul character in the stream is either the end of the current buffer or
    204     // a random nul in the file.  Disambiguate that here.
    205     if (CurPtr-1 != CurBuf->getBufferEnd())
    206       return 0;  // Just whitespace.
    207 
    208     // Otherwise, return end of file.
    209     --CurPtr;  // Another call to lex will return EOF again.
    210     return EOF;
    211   }
    212 }
    213 
    214 
    215 lltok::Kind LLLexer::LexToken() {
    216   TokStart = CurPtr;
    217 
    218   int CurChar = getNextChar();
    219   switch (CurChar) {
    220   default:
    221     // Handle letters: [a-zA-Z_]
    222     if (isalpha(CurChar) || CurChar == '_')
    223       return LexIdentifier();
    224 
    225     return lltok::Error;
    226   case EOF: return lltok::Eof;
    227   case 0:
    228   case ' ':
    229   case '\t':
    230   case '\n':
    231   case '\r':
    232     // Ignore whitespace.
    233     return LexToken();
    234   case '+': return LexPositive();
    235   case '@': return LexAt();
    236   case '%': return LexPercent();
    237   case '"': return LexQuote();
    238   case '.':
    239     if (const char *Ptr = isLabelTail(CurPtr)) {
    240       CurPtr = Ptr;
    241       StrVal.assign(TokStart, CurPtr-1);
    242       return lltok::LabelStr;
    243     }
    244     if (CurPtr[0] == '.' && CurPtr[1] == '.') {
    245       CurPtr += 2;
    246       return lltok::dotdotdot;
    247     }
    248     return lltok::Error;
    249   case '$':
    250     if (const char *Ptr = isLabelTail(CurPtr)) {
    251       CurPtr = Ptr;
    252       StrVal.assign(TokStart, CurPtr-1);
    253       return lltok::LabelStr;
    254     }
    255     return lltok::Error;
    256   case ';':
    257     SkipLineComment();
    258     return LexToken();
    259   case '!': return LexExclaim();
    260   case '0': case '1': case '2': case '3': case '4':
    261   case '5': case '6': case '7': case '8': case '9':
    262   case '-':
    263     return LexDigitOrNegative();
    264   case '=': return lltok::equal;
    265   case '[': return lltok::lsquare;
    266   case ']': return lltok::rsquare;
    267   case '{': return lltok::lbrace;
    268   case '}': return lltok::rbrace;
    269   case '<': return lltok::less;
    270   case '>': return lltok::greater;
    271   case '(': return lltok::lparen;
    272   case ')': return lltok::rparen;
    273   case ',': return lltok::comma;
    274   case '*': return lltok::star;
    275   case '\\': return lltok::backslash;
    276   }
    277 }
    278 
    279 void LLLexer::SkipLineComment() {
    280   while (1) {
    281     if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
    282       return;
    283   }
    284 }
    285 
    286 /// LexAt - Lex all tokens that start with an @ character:
    287 ///   GlobalVar   @\"[^\"]*\"
    288 ///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
    289 ///   GlobalVarID @[0-9]+
    290 lltok::Kind LLLexer::LexAt() {
    291   // Handle AtStringConstant: @\"[^\"]*\"
    292   if (CurPtr[0] == '"') {
    293     ++CurPtr;
    294 
    295     while (1) {
    296       int CurChar = getNextChar();
    297 
    298       if (CurChar == EOF) {
    299         Error("end of file in global variable name");
    300         return lltok::Error;
    301       }
    302       if (CurChar == '"') {
    303         StrVal.assign(TokStart+2, CurPtr-1);
    304         UnEscapeLexed(StrVal);
    305         return lltok::GlobalVar;
    306       }
    307     }
    308   }
    309 
    310   // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
    311   if (ReadVarName())
    312     return lltok::GlobalVar;
    313 
    314   // Handle GlobalVarID: @[0-9]+
    315   if (isdigit(CurPtr[0])) {
    316     for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
    317       /*empty*/;
    318 
    319     uint64_t Val = atoull(TokStart+1, CurPtr);
    320     if ((unsigned)Val != Val)
    321       Error("invalid value number (too large)!");
    322     UIntVal = unsigned(Val);
    323     return lltok::GlobalID;
    324   }
    325 
    326   return lltok::Error;
    327 }
    328 
    329 /// ReadString - Read a string until the closing quote.
    330 lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
    331   const char *Start = CurPtr;
    332   while (1) {
    333     int CurChar = getNextChar();
    334 
    335     if (CurChar == EOF) {
    336       Error("end of file in string constant");
    337       return lltok::Error;
    338     }
    339     if (CurChar == '"') {
    340       StrVal.assign(Start, CurPtr-1);
    341       UnEscapeLexed(StrVal);
    342       return kind;
    343     }
    344   }
    345 }
    346 
    347 /// ReadVarName - Read the rest of a token containing a variable name.
    348 bool LLLexer::ReadVarName() {
    349   const char *NameStart = CurPtr;
    350   if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
    351       CurPtr[0] == '.' || CurPtr[0] == '_') {
    352     ++CurPtr;
    353     while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
    354            CurPtr[0] == '.' || CurPtr[0] == '_')
    355       ++CurPtr;
    356 
    357     StrVal.assign(NameStart, CurPtr);
    358     return true;
    359   }
    360   return false;
    361 }
    362 
    363 /// LexPercent - Lex all tokens that start with a % character:
    364 ///   LocalVar   ::= %\"[^\"]*\"
    365 ///   LocalVar   ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
    366 ///   LocalVarID ::= %[0-9]+
    367 lltok::Kind LLLexer::LexPercent() {
    368   // Handle LocalVarName: %\"[^\"]*\"
    369   if (CurPtr[0] == '"') {
    370     ++CurPtr;
    371     return ReadString(lltok::LocalVar);
    372   }
    373 
    374   // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
    375   if (ReadVarName())
    376     return lltok::LocalVar;
    377 
    378   // Handle LocalVarID: %[0-9]+
    379   if (isdigit(CurPtr[0])) {
    380     for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
    381       /*empty*/;
    382 
    383     uint64_t Val = atoull(TokStart+1, CurPtr);
    384     if ((unsigned)Val != Val)
    385       Error("invalid value number (too large)!");
    386     UIntVal = unsigned(Val);
    387     return lltok::LocalVarID;
    388   }
    389 
    390   return lltok::Error;
    391 }
    392 
    393 /// LexQuote - Lex all tokens that start with a " character:
    394 ///   QuoteLabel        "[^"]+":
    395 ///   StringConstant    "[^"]*"
    396 lltok::Kind LLLexer::LexQuote() {
    397   lltok::Kind kind = ReadString(lltok::StringConstant);
    398   if (kind == lltok::Error || kind == lltok::Eof)
    399     return kind;
    400 
    401   if (CurPtr[0] == ':') {
    402     ++CurPtr;
    403     kind = lltok::LabelStr;
    404   }
    405 
    406   return kind;
    407 }
    408 
    409 /// LexExclaim:
    410 ///    !foo
    411 ///    !
    412 lltok::Kind LLLexer::LexExclaim() {
    413   // Lex a metadata name as a MetadataVar.
    414   if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
    415       CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
    416     ++CurPtr;
    417     while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
    418            CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
    419       ++CurPtr;
    420 
    421     StrVal.assign(TokStart+1, CurPtr);   // Skip !
    422     UnEscapeLexed(StrVal);
    423     return lltok::MetadataVar;
    424   }
    425   return lltok::exclaim;
    426 }
    427 
    428 /// LexIdentifier: Handle several related productions:
    429 ///    Label           [-a-zA-Z$._0-9]+:
    430 ///    IntegerType     i[0-9]+
    431 ///    Keyword         sdiv, float, ...
    432 ///    HexIntConstant  [us]0x[0-9A-Fa-f]+
    433 lltok::Kind LLLexer::LexIdentifier() {
    434   const char *StartChar = CurPtr;
    435   const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
    436   const char *KeywordEnd = 0;
    437 
    438   for (; isLabelChar(*CurPtr); ++CurPtr) {
    439     // If we decide this is an integer, remember the end of the sequence.
    440     if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
    441     if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr;
    442   }
    443 
    444   // If we stopped due to a colon, this really is a label.
    445   if (*CurPtr == ':') {
    446     StrVal.assign(StartChar-1, CurPtr++);
    447     return lltok::LabelStr;
    448   }
    449 
    450   // Otherwise, this wasn't a label.  If this was valid as an integer type,
    451   // return it.
    452   if (IntEnd == 0) IntEnd = CurPtr;
    453   if (IntEnd != StartChar) {
    454     CurPtr = IntEnd;
    455     uint64_t NumBits = atoull(StartChar, CurPtr);
    456     if (NumBits < IntegerType::MIN_INT_BITS ||
    457         NumBits > IntegerType::MAX_INT_BITS) {
    458       Error("bitwidth for integer type out of range!");
    459       return lltok::Error;
    460     }
    461     TyVal = IntegerType::get(Context, NumBits);
    462     return lltok::Type;
    463   }
    464 
    465   // Otherwise, this was a letter sequence.  See which keyword this is.
    466   if (KeywordEnd == 0) KeywordEnd = CurPtr;
    467   CurPtr = KeywordEnd;
    468   --StartChar;
    469   unsigned Len = CurPtr-StartChar;
    470 #define KEYWORD(STR) \
    471   if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
    472     return lltok::kw_##STR;
    473 
    474   KEYWORD(true);    KEYWORD(false);
    475   KEYWORD(declare); KEYWORD(define);
    476   KEYWORD(global);  KEYWORD(constant);
    477 
    478   KEYWORD(private);
    479   KEYWORD(linker_private);
    480   KEYWORD(linker_private_weak);
    481   KEYWORD(linker_private_weak_def_auto);
    482   KEYWORD(internal);
    483   KEYWORD(available_externally);
    484   KEYWORD(linkonce);
    485   KEYWORD(linkonce_odr);
    486   KEYWORD(weak);
    487   KEYWORD(weak_odr);
    488   KEYWORD(appending);
    489   KEYWORD(dllimport);
    490   KEYWORD(dllexport);
    491   KEYWORD(common);
    492   KEYWORD(default);
    493   KEYWORD(hidden);
    494   KEYWORD(protected);
    495   KEYWORD(unnamed_addr);
    496   KEYWORD(extern_weak);
    497   KEYWORD(external);
    498   KEYWORD(thread_local);
    499   KEYWORD(zeroinitializer);
    500   KEYWORD(undef);
    501   KEYWORD(null);
    502   KEYWORD(to);
    503   KEYWORD(tail);
    504   KEYWORD(target);
    505   KEYWORD(triple);
    506   KEYWORD(deplibs);
    507   KEYWORD(datalayout);
    508   KEYWORD(volatile);
    509   KEYWORD(atomic);
    510   KEYWORD(unordered);
    511   KEYWORD(monotonic);
    512   KEYWORD(acquire);
    513   KEYWORD(release);
    514   KEYWORD(acq_rel);
    515   KEYWORD(seq_cst);
    516   KEYWORD(singlethread);
    517 
    518   KEYWORD(nuw);
    519   KEYWORD(nsw);
    520   KEYWORD(exact);
    521   KEYWORD(inbounds);
    522   KEYWORD(align);
    523   KEYWORD(addrspace);
    524   KEYWORD(section);
    525   KEYWORD(alias);
    526   KEYWORD(module);
    527   KEYWORD(asm);
    528   KEYWORD(sideeffect);
    529   KEYWORD(alignstack);
    530   KEYWORD(gc);
    531 
    532   KEYWORD(ccc);
    533   KEYWORD(fastcc);
    534   KEYWORD(coldcc);
    535   KEYWORD(x86_stdcallcc);
    536   KEYWORD(x86_fastcallcc);
    537   KEYWORD(x86_thiscallcc);
    538   KEYWORD(arm_apcscc);
    539   KEYWORD(arm_aapcscc);
    540   KEYWORD(arm_aapcs_vfpcc);
    541   KEYWORD(msp430_intrcc);
    542   KEYWORD(ptx_kernel);
    543   KEYWORD(ptx_device);
    544 
    545   KEYWORD(cc);
    546   KEYWORD(c);
    547 
    548   KEYWORD(signext);
    549   KEYWORD(zeroext);
    550   KEYWORD(inreg);
    551   KEYWORD(sret);
    552   KEYWORD(nounwind);
    553   KEYWORD(noreturn);
    554   KEYWORD(noalias);
    555   KEYWORD(nocapture);
    556   KEYWORD(byval);
    557   KEYWORD(nest);
    558   KEYWORD(readnone);
    559   KEYWORD(readonly);
    560   KEYWORD(uwtable);
    561   KEYWORD(returns_twice);
    562 
    563   KEYWORD(inlinehint);
    564   KEYWORD(noinline);
    565   KEYWORD(alwaysinline);
    566   KEYWORD(optsize);
    567   KEYWORD(ssp);
    568   KEYWORD(sspreq);
    569   KEYWORD(noredzone);
    570   KEYWORD(noimplicitfloat);
    571   KEYWORD(naked);
    572   KEYWORD(nonlazybind);
    573 
    574   KEYWORD(type);
    575   KEYWORD(opaque);
    576 
    577   KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
    578   KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
    579   KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
    580   KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
    581 
    582   KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
    583   KEYWORD(umin);
    584 
    585   KEYWORD(x);
    586   KEYWORD(blockaddress);
    587 
    588   KEYWORD(personality);
    589   KEYWORD(cleanup);
    590   KEYWORD(catch);
    591   KEYWORD(filter);
    592 #undef KEYWORD
    593 
    594   // Keywords for types.
    595 #define TYPEKEYWORD(STR, LLVMTY) \
    596   if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
    597     TyVal = LLVMTY; return lltok::Type; }
    598   TYPEKEYWORD("void",      Type::getVoidTy(Context));
    599   TYPEKEYWORD("float",     Type::getFloatTy(Context));
    600   TYPEKEYWORD("double",    Type::getDoubleTy(Context));
    601   TYPEKEYWORD("x86_fp80",  Type::getX86_FP80Ty(Context));
    602   TYPEKEYWORD("fp128",     Type::getFP128Ty(Context));
    603   TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
    604   TYPEKEYWORD("label",     Type::getLabelTy(Context));
    605   TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
    606   TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
    607 #undef TYPEKEYWORD
    608 
    609   // Keywords for instructions.
    610 #define INSTKEYWORD(STR, Enum) \
    611   if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
    612     UIntVal = Instruction::Enum; return lltok::kw_##STR; }
    613 
    614   INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
    615   INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
    616   INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
    617   INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
    618   INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
    619   INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
    620   INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
    621   INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
    622 
    623   INSTKEYWORD(phi,         PHI);
    624   INSTKEYWORD(call,        Call);
    625   INSTKEYWORD(trunc,       Trunc);
    626   INSTKEYWORD(zext,        ZExt);
    627   INSTKEYWORD(sext,        SExt);
    628   INSTKEYWORD(fptrunc,     FPTrunc);
    629   INSTKEYWORD(fpext,       FPExt);
    630   INSTKEYWORD(uitofp,      UIToFP);
    631   INSTKEYWORD(sitofp,      SIToFP);
    632   INSTKEYWORD(fptoui,      FPToUI);
    633   INSTKEYWORD(fptosi,      FPToSI);
    634   INSTKEYWORD(inttoptr,    IntToPtr);
    635   INSTKEYWORD(ptrtoint,    PtrToInt);
    636   INSTKEYWORD(bitcast,     BitCast);
    637   INSTKEYWORD(select,      Select);
    638   INSTKEYWORD(va_arg,      VAArg);
    639   INSTKEYWORD(ret,         Ret);
    640   INSTKEYWORD(br,          Br);
    641   INSTKEYWORD(switch,      Switch);
    642   INSTKEYWORD(indirectbr,  IndirectBr);
    643   INSTKEYWORD(invoke,      Invoke);
    644   INSTKEYWORD(resume,      Resume);
    645   INSTKEYWORD(unwind,      Unwind);
    646   INSTKEYWORD(unreachable, Unreachable);
    647 
    648   INSTKEYWORD(alloca,      Alloca);
    649   INSTKEYWORD(load,        Load);
    650   INSTKEYWORD(store,       Store);
    651   INSTKEYWORD(cmpxchg,     AtomicCmpXchg);
    652   INSTKEYWORD(atomicrmw,   AtomicRMW);
    653   INSTKEYWORD(fence,       Fence);
    654   INSTKEYWORD(getelementptr, GetElementPtr);
    655 
    656   INSTKEYWORD(extractelement, ExtractElement);
    657   INSTKEYWORD(insertelement,  InsertElement);
    658   INSTKEYWORD(shufflevector,  ShuffleVector);
    659   INSTKEYWORD(extractvalue,   ExtractValue);
    660   INSTKEYWORD(insertvalue,    InsertValue);
    661   INSTKEYWORD(landingpad,     LandingPad);
    662 #undef INSTKEYWORD
    663 
    664   // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
    665   // the CFE to avoid forcing it to deal with 64-bit numbers.
    666   if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
    667       TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
    668     int len = CurPtr-TokStart-3;
    669     uint32_t bits = len * 4;
    670     APInt Tmp(bits, StringRef(TokStart+3, len), 16);
    671     uint32_t activeBits = Tmp.getActiveBits();
    672     if (activeBits > 0 && activeBits < bits)
    673       Tmp = Tmp.trunc(activeBits);
    674     APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
    675     return lltok::APSInt;
    676   }
    677 
    678   // If this is "cc1234", return this as just "cc".
    679   if (TokStart[0] == 'c' && TokStart[1] == 'c') {
    680     CurPtr = TokStart+2;
    681     return lltok::kw_cc;
    682   }
    683 
    684   // Finally, if this isn't known, return an error.
    685   CurPtr = TokStart+1;
    686   return lltok::Error;
    687 }
    688 
    689 
    690 /// Lex0x: Handle productions that start with 0x, knowing that it matches and
    691 /// that this is not a label:
    692 ///    HexFPConstant     0x[0-9A-Fa-f]+
    693 ///    HexFP80Constant   0xK[0-9A-Fa-f]+
    694 ///    HexFP128Constant  0xL[0-9A-Fa-f]+
    695 ///    HexPPC128Constant 0xM[0-9A-Fa-f]+
    696 lltok::Kind LLLexer::Lex0x() {
    697   CurPtr = TokStart + 2;
    698 
    699   char Kind;
    700   if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') {
    701     Kind = *CurPtr++;
    702   } else {
    703     Kind = 'J';
    704   }
    705 
    706   if (!isxdigit(CurPtr[0])) {
    707     // Bad token, return it as an error.
    708     CurPtr = TokStart+1;
    709     return lltok::Error;
    710   }
    711 
    712   while (isxdigit(CurPtr[0]))
    713     ++CurPtr;
    714 
    715   if (Kind == 'J') {
    716     // HexFPConstant - Floating point constant represented in IEEE format as a
    717     // hexadecimal number for when exponential notation is not precise enough.
    718     // Float and double only.
    719     APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
    720     return lltok::APFloat;
    721   }
    722 
    723   uint64_t Pair[2];
    724   switch (Kind) {
    725   default: llvm_unreachable("Unknown kind!");
    726   case 'K':
    727     // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
    728     FP80HexToIntPair(TokStart+3, CurPtr, Pair);
    729     APFloatVal = APFloat(APInt(80, Pair));
    730     return lltok::APFloat;
    731   case 'L':
    732     // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
    733     HexToIntPair(TokStart+3, CurPtr, Pair);
    734     APFloatVal = APFloat(APInt(128, Pair), true);
    735     return lltok::APFloat;
    736   case 'M':
    737     // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
    738     HexToIntPair(TokStart+3, CurPtr, Pair);
    739     APFloatVal = APFloat(APInt(128, Pair));
    740     return lltok::APFloat;
    741   }
    742 }
    743 
    744 /// LexIdentifier: Handle several related productions:
    745 ///    Label             [-a-zA-Z$._0-9]+:
    746 ///    NInteger          -[0-9]+
    747 ///    FPConstant        [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
    748 ///    PInteger          [0-9]+
    749 ///    HexFPConstant     0x[0-9A-Fa-f]+
    750 ///    HexFP80Constant   0xK[0-9A-Fa-f]+
    751 ///    HexFP128Constant  0xL[0-9A-Fa-f]+
    752 ///    HexPPC128Constant 0xM[0-9A-Fa-f]+
    753 lltok::Kind LLLexer::LexDigitOrNegative() {
    754   // If the letter after the negative is a number, this is probably a label.
    755   if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
    756     // Okay, this is not a number after the -, it's probably a label.
    757     if (const char *End = isLabelTail(CurPtr)) {
    758       StrVal.assign(TokStart, End-1);
    759       CurPtr = End;
    760       return lltok::LabelStr;
    761     }
    762 
    763     return lltok::Error;
    764   }
    765 
    766   // At this point, it is either a label, int or fp constant.
    767 
    768   // Skip digits, we have at least one.
    769   for (; isdigit(CurPtr[0]); ++CurPtr)
    770     /*empty*/;
    771 
    772   // Check to see if this really is a label afterall, e.g. "-1:".
    773   if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
    774     if (const char *End = isLabelTail(CurPtr)) {
    775       StrVal.assign(TokStart, End-1);
    776       CurPtr = End;
    777       return lltok::LabelStr;
    778     }
    779   }
    780 
    781   // If the next character is a '.', then it is a fp value, otherwise its
    782   // integer.
    783   if (CurPtr[0] != '.') {
    784     if (TokStart[0] == '0' && TokStart[1] == 'x')
    785       return Lex0x();
    786     unsigned Len = CurPtr-TokStart;
    787     uint32_t numBits = ((Len * 64) / 19) + 2;
    788     APInt Tmp(numBits, StringRef(TokStart, Len), 10);
    789     if (TokStart[0] == '-') {
    790       uint32_t minBits = Tmp.getMinSignedBits();
    791       if (minBits > 0 && minBits < numBits)
    792         Tmp = Tmp.trunc(minBits);
    793       APSIntVal = APSInt(Tmp, false);
    794     } else {
    795       uint32_t activeBits = Tmp.getActiveBits();
    796       if (activeBits > 0 && activeBits < numBits)
    797         Tmp = Tmp.trunc(activeBits);
    798       APSIntVal = APSInt(Tmp, true);
    799     }
    800     return lltok::APSInt;
    801   }
    802 
    803   ++CurPtr;
    804 
    805   // Skip over [0-9]*([eE][-+]?[0-9]+)?
    806   while (isdigit(CurPtr[0])) ++CurPtr;
    807 
    808   if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
    809     if (isdigit(CurPtr[1]) ||
    810         ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
    811       CurPtr += 2;
    812       while (isdigit(CurPtr[0])) ++CurPtr;
    813     }
    814   }
    815 
    816   APFloatVal = APFloat(std::atof(TokStart));
    817   return lltok::APFloat;
    818 }
    819 
    820 ///    FPConstant  [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
    821 lltok::Kind LLLexer::LexPositive() {
    822   // If the letter after the negative is a number, this is probably not a
    823   // label.
    824   if (!isdigit(CurPtr[0]))
    825     return lltok::Error;
    826 
    827   // Skip digits.
    828   for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
    829     /*empty*/;
    830 
    831   // At this point, we need a '.'.
    832   if (CurPtr[0] != '.') {
    833     CurPtr = TokStart+1;
    834     return lltok::Error;
    835   }
    836 
    837   ++CurPtr;
    838 
    839   // Skip over [0-9]*([eE][-+]?[0-9]+)?
    840   while (isdigit(CurPtr[0])) ++CurPtr;
    841 
    842   if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
    843     if (isdigit(CurPtr[1]) ||
    844         ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
    845       CurPtr += 2;
    846       while (isdigit(CurPtr[0])) ++CurPtr;
    847     }
    848   }
    849 
    850   APFloatVal = APFloat(std::atof(TokStart));
    851   return lltok::APFloat;
    852 }
    853