1 //===- MILexer.cpp - Machine instructions lexer implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the lexing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MILexer.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/ADT/StringSwitch.h" 17 #include "llvm/ADT/Twine.h" 18 #include <cctype> 19 20 using namespace llvm; 21 22 namespace { 23 24 /// This class provides a way to iterate and get characters from the source 25 /// string. 26 class Cursor { 27 const char *Ptr; 28 const char *End; 29 30 public: 31 Cursor(NoneType) : Ptr(nullptr), End(nullptr) {} 32 33 explicit Cursor(StringRef Str) { 34 Ptr = Str.data(); 35 End = Ptr + Str.size(); 36 } 37 38 bool isEOF() const { return Ptr == End; } 39 40 char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 41 42 void advance(unsigned I = 1) { Ptr += I; } 43 44 StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 45 46 StringRef upto(Cursor C) const { 47 assert(C.Ptr >= Ptr && C.Ptr <= End); 48 return StringRef(Ptr, C.Ptr - Ptr); 49 } 50 51 StringRef::iterator location() const { return Ptr; } 52 53 operator bool() const { return Ptr != nullptr; } 54 }; 55 56 } // end anonymous namespace 57 58 MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { 59 this->Kind = Kind; 60 this->Range = Range; 61 return *this; 62 } 63 64 MIToken &MIToken::setStringValue(StringRef StrVal) { 65 StringValue = StrVal; 66 return *this; 67 } 68 69 MIToken &MIToken::setOwnedStringValue(std::string StrVal) { 70 StringValueStorage = std::move(StrVal); 71 StringValue = StringValueStorage; 72 return *this; 73 } 74 75 MIToken &MIToken::setIntegerValue(APSInt IntVal) { 76 this->IntVal = std::move(IntVal); 77 return *this; 78 } 79 80 /// Skip the leading whitespace characters and return the updated cursor. 81 static Cursor skipWhitespace(Cursor C) { 82 while (isblank(C.peek())) 83 C.advance(); 84 return C; 85 } 86 87 static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } 88 89 /// Skip a line comment and return the updated cursor. 90 static Cursor skipComment(Cursor C) { 91 if (C.peek() != ';') 92 return C; 93 while (!isNewlineChar(C.peek()) && !C.isEOF()) 94 C.advance(); 95 return C; 96 } 97 98 /// Return true if the given character satisfies the following regular 99 /// expression: [-a-zA-Z$._0-9] 100 static bool isIdentifierChar(char C) { 101 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || 102 C == '$'; 103 } 104 105 /// Unescapes the given string value. 106 /// 107 /// Expects the string value to be quoted. 108 static std::string unescapeQuotedString(StringRef Value) { 109 assert(Value.front() == '"' && Value.back() == '"'); 110 Cursor C = Cursor(Value.substr(1, Value.size() - 2)); 111 112 std::string Str; 113 Str.reserve(C.remaining().size()); 114 while (!C.isEOF()) { 115 char Char = C.peek(); 116 if (Char == '\\') { 117 if (C.peek(1) == '\\') { 118 // Two '\' become one 119 Str += '\\'; 120 C.advance(2); 121 continue; 122 } 123 if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { 124 Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); 125 C.advance(3); 126 continue; 127 } 128 } 129 Str += Char; 130 C.advance(); 131 } 132 return Str; 133 } 134 135 /// Lex a string constant using the following regular expression: \"[^\"]*\" 136 static Cursor lexStringConstant( 137 Cursor C, 138 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 139 assert(C.peek() == '"'); 140 for (C.advance(); C.peek() != '"'; C.advance()) { 141 if (C.isEOF() || isNewlineChar(C.peek())) { 142 ErrorCallback( 143 C.location(), 144 "end of machine instruction reached before the closing '\"'"); 145 return None; 146 } 147 } 148 C.advance(); 149 return C; 150 } 151 152 static Cursor lexName( 153 Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned PrefixLength, 154 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 155 auto Range = C; 156 C.advance(PrefixLength); 157 if (C.peek() == '"') { 158 if (Cursor R = lexStringConstant(C, ErrorCallback)) { 159 StringRef String = Range.upto(R); 160 Token.reset(Type, String) 161 .setOwnedStringValue( 162 unescapeQuotedString(String.drop_front(PrefixLength))); 163 return R; 164 } 165 Token.reset(MIToken::Error, Range.remaining()); 166 return Range; 167 } 168 while (isIdentifierChar(C.peek())) 169 C.advance(); 170 Token.reset(Type, Range.upto(C)) 171 .setStringValue(Range.upto(C).drop_front(PrefixLength)); 172 return C; 173 } 174 175 static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) { 176 if (C.peek() != 'i' || !isdigit(C.peek(1))) 177 return None; 178 auto Range = C; 179 C.advance(); // Skip 'i' 180 while (isdigit(C.peek())) 181 C.advance(); 182 Token.reset(MIToken::IntegerType, Range.upto(C)); 183 return C; 184 } 185 186 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 187 return StringSwitch<MIToken::TokenKind>(Identifier) 188 .Case("_", MIToken::underscore) 189 .Case("implicit", MIToken::kw_implicit) 190 .Case("implicit-def", MIToken::kw_implicit_define) 191 .Case("def", MIToken::kw_def) 192 .Case("dead", MIToken::kw_dead) 193 .Case("killed", MIToken::kw_killed) 194 .Case("undef", MIToken::kw_undef) 195 .Case("internal", MIToken::kw_internal) 196 .Case("early-clobber", MIToken::kw_early_clobber) 197 .Case("debug-use", MIToken::kw_debug_use) 198 .Case("tied-def", MIToken::kw_tied_def) 199 .Case("frame-setup", MIToken::kw_frame_setup) 200 .Case("debug-location", MIToken::kw_debug_location) 201 .Case(".cfi_same_value", MIToken::kw_cfi_same_value) 202 .Case(".cfi_offset", MIToken::kw_cfi_offset) 203 .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register) 204 .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) 205 .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa) 206 .Case("blockaddress", MIToken::kw_blockaddress) 207 .Case("target-index", MIToken::kw_target_index) 208 .Case("half", MIToken::kw_half) 209 .Case("float", MIToken::kw_float) 210 .Case("double", MIToken::kw_double) 211 .Case("x86_fp80", MIToken::kw_x86_fp80) 212 .Case("fp128", MIToken::kw_fp128) 213 .Case("ppc_fp128", MIToken::kw_ppc_fp128) 214 .Case("target-flags", MIToken::kw_target_flags) 215 .Case("volatile", MIToken::kw_volatile) 216 .Case("non-temporal", MIToken::kw_non_temporal) 217 .Case("invariant", MIToken::kw_invariant) 218 .Case("align", MIToken::kw_align) 219 .Case("stack", MIToken::kw_stack) 220 .Case("got", MIToken::kw_got) 221 .Case("jump-table", MIToken::kw_jump_table) 222 .Case("constant-pool", MIToken::kw_constant_pool) 223 .Case("call-entry", MIToken::kw_call_entry) 224 .Case("liveout", MIToken::kw_liveout) 225 .Case("address-taken", MIToken::kw_address_taken) 226 .Case("landing-pad", MIToken::kw_landing_pad) 227 .Case("liveins", MIToken::kw_liveins) 228 .Case("successors", MIToken::kw_successors) 229 .Default(MIToken::Identifier); 230 } 231 232 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 233 if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.') 234 return None; 235 auto Range = C; 236 while (isIdentifierChar(C.peek())) 237 C.advance(); 238 auto Identifier = Range.upto(C); 239 Token.reset(getIdentifierKind(Identifier), Identifier) 240 .setStringValue(Identifier); 241 return C; 242 } 243 244 static Cursor maybeLexMachineBasicBlock( 245 Cursor C, MIToken &Token, 246 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 247 bool IsReference = C.remaining().startswith("%bb."); 248 if (!IsReference && !C.remaining().startswith("bb.")) 249 return None; 250 auto Range = C; 251 unsigned PrefixLength = IsReference ? 4 : 3; 252 C.advance(PrefixLength); // Skip '%bb.' or 'bb.' 253 if (!isdigit(C.peek())) { 254 Token.reset(MIToken::Error, C.remaining()); 255 ErrorCallback(C.location(), "expected a number after '%bb.'"); 256 return C; 257 } 258 auto NumberRange = C; 259 while (isdigit(C.peek())) 260 C.advance(); 261 StringRef Number = NumberRange.upto(C); 262 unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' 263 if (C.peek() == '.') { 264 C.advance(); // Skip '.' 265 ++StringOffset; 266 while (isIdentifierChar(C.peek())) 267 C.advance(); 268 } 269 Token.reset(IsReference ? MIToken::MachineBasicBlock 270 : MIToken::MachineBasicBlockLabel, 271 Range.upto(C)) 272 .setIntegerValue(APSInt(Number)) 273 .setStringValue(Range.upto(C).drop_front(StringOffset)); 274 return C; 275 } 276 277 static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, 278 MIToken::TokenKind Kind) { 279 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 280 return None; 281 auto Range = C; 282 C.advance(Rule.size()); 283 auto NumberRange = C; 284 while (isdigit(C.peek())) 285 C.advance(); 286 Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); 287 return C; 288 } 289 290 static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, 291 MIToken::TokenKind Kind) { 292 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 293 return None; 294 auto Range = C; 295 C.advance(Rule.size()); 296 auto NumberRange = C; 297 while (isdigit(C.peek())) 298 C.advance(); 299 StringRef Number = NumberRange.upto(C); 300 unsigned StringOffset = Rule.size() + Number.size(); 301 if (C.peek() == '.') { 302 C.advance(); 303 ++StringOffset; 304 while (isIdentifierChar(C.peek())) 305 C.advance(); 306 } 307 Token.reset(Kind, Range.upto(C)) 308 .setIntegerValue(APSInt(Number)) 309 .setStringValue(Range.upto(C).drop_front(StringOffset)); 310 return C; 311 } 312 313 static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { 314 return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); 315 } 316 317 static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { 318 return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); 319 } 320 321 static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { 322 return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); 323 } 324 325 static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { 326 return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); 327 } 328 329 static Cursor maybeLexIRBlock( 330 Cursor C, MIToken &Token, 331 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 332 const StringRef Rule = "%ir-block."; 333 if (!C.remaining().startswith(Rule)) 334 return None; 335 if (isdigit(C.peek(Rule.size()))) 336 return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); 337 return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); 338 } 339 340 static Cursor maybeLexIRValue( 341 Cursor C, MIToken &Token, 342 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 343 const StringRef Rule = "%ir."; 344 if (!C.remaining().startswith(Rule)) 345 return None; 346 if (isdigit(C.peek(Rule.size()))) 347 return maybeLexIndex(C, Token, Rule, MIToken::IRValue); 348 return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); 349 } 350 351 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { 352 auto Range = C; 353 C.advance(); // Skip '%' 354 auto NumberRange = C; 355 while (isdigit(C.peek())) 356 C.advance(); 357 Token.reset(MIToken::VirtualRegister, Range.upto(C)) 358 .setIntegerValue(APSInt(NumberRange.upto(C))); 359 return C; 360 } 361 362 static Cursor maybeLexRegister(Cursor C, MIToken &Token) { 363 if (C.peek() != '%') 364 return None; 365 if (isdigit(C.peek(1))) 366 return lexVirtualRegister(C, Token); 367 auto Range = C; 368 C.advance(); // Skip '%' 369 while (isIdentifierChar(C.peek())) 370 C.advance(); 371 Token.reset(MIToken::NamedRegister, Range.upto(C)) 372 .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' 373 return C; 374 } 375 376 static Cursor maybeLexGlobalValue( 377 Cursor C, MIToken &Token, 378 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 379 if (C.peek() != '@') 380 return None; 381 if (!isdigit(C.peek(1))) 382 return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, 383 ErrorCallback); 384 auto Range = C; 385 C.advance(1); // Skip the '@' 386 auto NumberRange = C; 387 while (isdigit(C.peek())) 388 C.advance(); 389 Token.reset(MIToken::GlobalValue, Range.upto(C)) 390 .setIntegerValue(APSInt(NumberRange.upto(C))); 391 return C; 392 } 393 394 static Cursor maybeLexExternalSymbol( 395 Cursor C, MIToken &Token, 396 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 397 if (C.peek() != '$') 398 return None; 399 return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, 400 ErrorCallback); 401 } 402 403 static bool isValidHexFloatingPointPrefix(char C) { 404 return C == 'H' || C == 'K' || C == 'L' || C == 'M'; 405 } 406 407 static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) { 408 if (C.peek() != '0' || C.peek(1) != 'x') 409 return None; 410 Cursor Range = C; 411 C.advance(2); // Skip '0x' 412 if (isValidHexFloatingPointPrefix(C.peek())) 413 C.advance(); 414 while (isxdigit(C.peek())) 415 C.advance(); 416 Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 417 return C; 418 } 419 420 static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { 421 C.advance(); 422 // Skip over [0-9]*([eE][-+]?[0-9]+)? 423 while (isdigit(C.peek())) 424 C.advance(); 425 if ((C.peek() == 'e' || C.peek() == 'E') && 426 (isdigit(C.peek(1)) || 427 ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) { 428 C.advance(2); 429 while (isdigit(C.peek())) 430 C.advance(); 431 } 432 Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 433 return C; 434 } 435 436 static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { 437 if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 438 return None; 439 auto Range = C; 440 C.advance(); 441 while (isdigit(C.peek())) 442 C.advance(); 443 if (C.peek() == '.') 444 return lexFloatingPointLiteral(Range, C, Token); 445 StringRef StrVal = Range.upto(C); 446 Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal)); 447 return C; 448 } 449 450 static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { 451 return StringSwitch<MIToken::TokenKind>(Identifier) 452 .Case("!tbaa", MIToken::md_tbaa) 453 .Case("!alias.scope", MIToken::md_alias_scope) 454 .Case("!noalias", MIToken::md_noalias) 455 .Case("!range", MIToken::md_range) 456 .Default(MIToken::Error); 457 } 458 459 static Cursor maybeLexExlaim( 460 Cursor C, MIToken &Token, 461 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 462 if (C.peek() != '!') 463 return None; 464 auto Range = C; 465 C.advance(1); 466 if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) { 467 Token.reset(MIToken::exclaim, Range.upto(C)); 468 return C; 469 } 470 while (isIdentifierChar(C.peek())) 471 C.advance(); 472 StringRef StrVal = Range.upto(C); 473 Token.reset(getMetadataKeywordKind(StrVal), StrVal); 474 if (Token.isError()) 475 ErrorCallback(Token.location(), 476 "use of unknown metadata keyword '" + StrVal + "'"); 477 return C; 478 } 479 480 static MIToken::TokenKind symbolToken(char C) { 481 switch (C) { 482 case ',': 483 return MIToken::comma; 484 case '=': 485 return MIToken::equal; 486 case ':': 487 return MIToken::colon; 488 case '(': 489 return MIToken::lparen; 490 case ')': 491 return MIToken::rparen; 492 case '{': 493 return MIToken::lbrace; 494 case '}': 495 return MIToken::rbrace; 496 case '+': 497 return MIToken::plus; 498 case '-': 499 return MIToken::minus; 500 default: 501 return MIToken::Error; 502 } 503 } 504 505 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 506 MIToken::TokenKind Kind; 507 unsigned Length = 1; 508 if (C.peek() == ':' && C.peek(1) == ':') { 509 Kind = MIToken::coloncolon; 510 Length = 2; 511 } else 512 Kind = symbolToken(C.peek()); 513 if (Kind == MIToken::Error) 514 return None; 515 auto Range = C; 516 C.advance(Length); 517 Token.reset(Kind, Range.upto(C)); 518 return C; 519 } 520 521 static Cursor maybeLexNewline(Cursor C, MIToken &Token) { 522 if (!isNewlineChar(C.peek())) 523 return None; 524 auto Range = C; 525 C.advance(); 526 Token.reset(MIToken::Newline, Range.upto(C)); 527 return C; 528 } 529 530 static Cursor maybeLexEscapedIRValue( 531 Cursor C, MIToken &Token, 532 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 533 if (C.peek() != '`') 534 return None; 535 auto Range = C; 536 C.advance(); 537 auto StrRange = C; 538 while (C.peek() != '`') { 539 if (C.isEOF() || isNewlineChar(C.peek())) { 540 ErrorCallback( 541 C.location(), 542 "end of machine instruction reached before the closing '`'"); 543 Token.reset(MIToken::Error, Range.remaining()); 544 return C; 545 } 546 C.advance(); 547 } 548 StringRef Value = StrRange.upto(C); 549 C.advance(); 550 Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value); 551 return C; 552 } 553 554 StringRef llvm::lexMIToken( 555 StringRef Source, MIToken &Token, 556 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 557 auto C = skipComment(skipWhitespace(Cursor(Source))); 558 if (C.isEOF()) { 559 Token.reset(MIToken::Eof, C.remaining()); 560 return C.remaining(); 561 } 562 563 if (Cursor R = maybeLexIntegerType(C, Token)) 564 return R.remaining(); 565 if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 566 return R.remaining(); 567 if (Cursor R = maybeLexIdentifier(C, Token)) 568 return R.remaining(); 569 if (Cursor R = maybeLexJumpTableIndex(C, Token)) 570 return R.remaining(); 571 if (Cursor R = maybeLexStackObject(C, Token)) 572 return R.remaining(); 573 if (Cursor R = maybeLexFixedStackObject(C, Token)) 574 return R.remaining(); 575 if (Cursor R = maybeLexConstantPoolItem(C, Token)) 576 return R.remaining(); 577 if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) 578 return R.remaining(); 579 if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) 580 return R.remaining(); 581 if (Cursor R = maybeLexRegister(C, Token)) 582 return R.remaining(); 583 if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) 584 return R.remaining(); 585 if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) 586 return R.remaining(); 587 if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token)) 588 return R.remaining(); 589 if (Cursor R = maybeLexNumericalLiteral(C, Token)) 590 return R.remaining(); 591 if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback)) 592 return R.remaining(); 593 if (Cursor R = maybeLexSymbol(C, Token)) 594 return R.remaining(); 595 if (Cursor R = maybeLexNewline(C, Token)) 596 return R.remaining(); 597 if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) 598 return R.remaining(); 599 600 Token.reset(MIToken::Error, C.remaining()); 601 ErrorCallback(C.location(), 602 Twine("unexpected character '") + Twine(C.peek()) + "'"); 603 return C.remaining(); 604 } 605