1 //===------------ ARMDecoderEmitter.cpp - Decoder Generator ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the ARM Disassembler. 11 // It contains the tablegen backend that emits the decoder functions for ARM and 12 // Thumb. The disassembler core includes the auto-generated file, invokes the 13 // decoder functions, and builds up the MCInst based on the decoded Opcode. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #define DEBUG_TYPE "arm-decoder-emitter" 18 19 #include "ARMDecoderEmitter.h" 20 #include "CodeGenTarget.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include "llvm/TableGen/Record.h" 25 26 #include <vector> 27 #include <map> 28 #include <string> 29 30 using namespace llvm; 31 32 ///////////////////////////////////////////////////// 33 // // 34 // Enums and Utilities for ARM Instruction Format // 35 // // 36 ///////////////////////////////////////////////////// 37 38 #define ARM_FORMATS \ 39 ENTRY(ARM_FORMAT_PSEUDO, 0) \ 40 ENTRY(ARM_FORMAT_MULFRM, 1) \ 41 ENTRY(ARM_FORMAT_BRFRM, 2) \ 42 ENTRY(ARM_FORMAT_BRMISCFRM, 3) \ 43 ENTRY(ARM_FORMAT_DPFRM, 4) \ 44 ENTRY(ARM_FORMAT_DPSOREGREGFRM, 5) \ 45 ENTRY(ARM_FORMAT_LDFRM, 6) \ 46 ENTRY(ARM_FORMAT_STFRM, 7) \ 47 ENTRY(ARM_FORMAT_LDMISCFRM, 8) \ 48 ENTRY(ARM_FORMAT_STMISCFRM, 9) \ 49 ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \ 50 ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \ 51 ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \ 52 ENTRY(ARM_FORMAT_SATFRM, 13) \ 53 ENTRY(ARM_FORMAT_EXTFRM, 14) \ 54 ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \ 55 ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \ 56 ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \ 57 ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \ 58 ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \ 59 ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \ 60 ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \ 61 ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \ 62 ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \ 63 ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \ 64 ENTRY(ARM_FORMAT_THUMBFRM, 25) \ 65 ENTRY(ARM_FORMAT_MISCFRM, 26) \ 66 ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \ 67 ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \ 68 ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \ 69 ENTRY(ARM_FORMAT_NLdSt, 30) \ 70 ENTRY(ARM_FORMAT_N1RegModImm, 31) \ 71 ENTRY(ARM_FORMAT_N2Reg, 32) \ 72 ENTRY(ARM_FORMAT_NVCVT, 33) \ 73 ENTRY(ARM_FORMAT_NVecDupLn, 34) \ 74 ENTRY(ARM_FORMAT_N2RegVecShL, 35) \ 75 ENTRY(ARM_FORMAT_N2RegVecShR, 36) \ 76 ENTRY(ARM_FORMAT_N3Reg, 37) \ 77 ENTRY(ARM_FORMAT_N3RegVecSh, 38) \ 78 ENTRY(ARM_FORMAT_NVecExtract, 39) \ 79 ENTRY(ARM_FORMAT_NVecMulScalar, 40) \ 80 ENTRY(ARM_FORMAT_NVTBL, 41) \ 81 ENTRY(ARM_FORMAT_DPSOREGIMMFRM, 42) 82 83 // ARM instruction format specifies the encoding used by the instruction. 84 #define ENTRY(n, v) n = v, 85 typedef enum { 86 ARM_FORMATS 87 ARM_FORMAT_NA 88 } ARMFormat; 89 #undef ENTRY 90 91 // Converts enum to const char*. 92 static const char *stringForARMFormat(ARMFormat form) { 93 #define ENTRY(n, v) case n: return #n; 94 switch(form) { 95 ARM_FORMATS 96 case ARM_FORMAT_NA: 97 default: 98 return ""; 99 } 100 #undef ENTRY 101 } 102 103 enum { 104 IndexModeNone = 0, 105 IndexModePre = 1, 106 IndexModePost = 2, 107 IndexModeUpd = 3 108 }; 109 110 ///////////////////////// 111 // // 112 // Utility functions // 113 // // 114 ///////////////////////// 115 116 /// byteFromBitsInit - Return the byte value from a BitsInit. 117 /// Called from getByteField(). 118 static uint8_t byteFromBitsInit(BitsInit &init) { 119 int width = init.getNumBits(); 120 121 assert(width <= 8 && "Field is too large for uint8_t!"); 122 123 int index; 124 uint8_t mask = 0x01; 125 126 uint8_t ret = 0; 127 128 for (index = 0; index < width; index++) { 129 if (static_cast<BitInit*>(init.getBit(index))->getValue()) 130 ret |= mask; 131 132 mask <<= 1; 133 } 134 135 return ret; 136 } 137 138 static uint8_t getByteField(const Record &def, const char *str) { 139 BitsInit *bits = def.getValueAsBitsInit(str); 140 return byteFromBitsInit(*bits); 141 } 142 143 static BitsInit &getBitsField(const Record &def, const char *str) { 144 BitsInit *bits = def.getValueAsBitsInit(str); 145 return *bits; 146 } 147 148 /// sameStringExceptSuffix - Return true if the two strings differ only in RHS's 149 /// suffix. ("VST4d8", "VST4d8_UPD", "_UPD") as input returns true. 150 static 151 bool sameStringExceptSuffix(const StringRef LHS, const StringRef RHS, 152 const StringRef Suffix) { 153 154 if (RHS.startswith(LHS) && RHS.endswith(Suffix)) 155 return RHS.size() == LHS.size() + Suffix.size(); 156 157 return false; 158 } 159 160 /// thumbInstruction - Determine whether we have a Thumb instruction. 161 /// See also ARMInstrFormats.td. 162 static bool thumbInstruction(uint8_t Form) { 163 return Form == ARM_FORMAT_THUMBFRM; 164 } 165 166 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system 167 // for a bit value. 168 // 169 // BIT_UNFILTERED is used as the init value for a filter position. It is used 170 // only for filter processings. 171 typedef enum { 172 BIT_TRUE, // '1' 173 BIT_FALSE, // '0' 174 BIT_UNSET, // '?' 175 BIT_UNFILTERED // unfiltered 176 } bit_value_t; 177 178 static bool ValueSet(bit_value_t V) { 179 return (V == BIT_TRUE || V == BIT_FALSE); 180 } 181 static bool ValueNotSet(bit_value_t V) { 182 return (V == BIT_UNSET); 183 } 184 static int Value(bit_value_t V) { 185 return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1); 186 } 187 static bit_value_t bitFromBits(BitsInit &bits, unsigned index) { 188 if (BitInit *bit = dynamic_cast<BitInit*>(bits.getBit(index))) 189 return bit->getValue() ? BIT_TRUE : BIT_FALSE; 190 191 // The bit is uninitialized. 192 return BIT_UNSET; 193 } 194 // Prints the bit value for each position. 195 static void dumpBits(raw_ostream &o, BitsInit &bits) { 196 unsigned index; 197 198 for (index = bits.getNumBits(); index > 0; index--) { 199 switch (bitFromBits(bits, index - 1)) { 200 case BIT_TRUE: 201 o << "1"; 202 break; 203 case BIT_FALSE: 204 o << "0"; 205 break; 206 case BIT_UNSET: 207 o << "_"; 208 break; 209 default: 210 assert(0 && "unexpected return value from bitFromBits"); 211 } 212 } 213 } 214 215 // Enums for the available target names. 216 typedef enum { 217 TARGET_ARM = 0, 218 TARGET_THUMB 219 } TARGET_NAME_t; 220 221 // FIXME: Possibly auto-detected? 222 #define BIT_WIDTH 32 223 224 // Forward declaration. 225 class ARMFilterChooser; 226 227 // Representation of the instruction to work on. 228 typedef bit_value_t insn_t[BIT_WIDTH]; 229 230 /// Filter - Filter works with FilterChooser to produce the decoding tree for 231 /// the ISA. 232 /// 233 /// It is useful to think of a Filter as governing the switch stmts of the 234 /// decoding tree in a certain level. Each case stmt delegates to an inferior 235 /// FilterChooser to decide what further decoding logic to employ, or in another 236 /// words, what other remaining bits to look at. The FilterChooser eventually 237 /// chooses a best Filter to do its job. 238 /// 239 /// This recursive scheme ends when the number of Opcodes assigned to the 240 /// FilterChooser becomes 1 or if there is a conflict. A conflict happens when 241 /// the Filter/FilterChooser combo does not know how to distinguish among the 242 /// Opcodes assigned. 243 /// 244 /// An example of a conflict is 245 /// 246 /// Conflict: 247 /// 111101000.00........00010000.... 248 /// 111101000.00........0001........ 249 /// 1111010...00........0001........ 250 /// 1111010...00.................... 251 /// 1111010......................... 252 /// 1111............................ 253 /// ................................ 254 /// VST4q8a 111101000_00________00010000____ 255 /// VST4q8b 111101000_00________00010000____ 256 /// 257 /// The Debug output shows the path that the decoding tree follows to reach the 258 /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced 259 /// even registers, while VST4q8b is a vst4 to double-spaced odd regsisters. 260 /// 261 /// The encoding info in the .td files does not specify this meta information, 262 /// which could have been used by the decoder to resolve the conflict. The 263 /// decoder could try to decode the even/odd register numbering and assign to 264 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a" 265 /// version and return the Opcode since the two have the same Asm format string. 266 class ARMFilter { 267 protected: 268 ARMFilterChooser *Owner; // points to the FilterChooser who owns this filter 269 unsigned StartBit; // the starting bit position 270 unsigned NumBits; // number of bits to filter 271 bool Mixed; // a mixed region contains both set and unset bits 272 273 // Map of well-known segment value to the set of uid's with that value. 274 std::map<uint64_t, std::vector<unsigned> > FilteredInstructions; 275 276 // Set of uid's with non-constant segment values. 277 std::vector<unsigned> VariableInstructions; 278 279 // Map of well-known segment value to its delegate. 280 std::map<unsigned, ARMFilterChooser*> FilterChooserMap; 281 282 // Number of instructions which fall under FilteredInstructions category. 283 unsigned NumFiltered; 284 285 // Keeps track of the last opcode in the filtered bucket. 286 unsigned LastOpcFiltered; 287 288 // Number of instructions which fall under VariableInstructions category. 289 unsigned NumVariable; 290 291 public: 292 unsigned getNumFiltered() { return NumFiltered; } 293 unsigned getNumVariable() { return NumVariable; } 294 unsigned getSingletonOpc() { 295 assert(NumFiltered == 1); 296 return LastOpcFiltered; 297 } 298 // Return the filter chooser for the group of instructions without constant 299 // segment values. 300 ARMFilterChooser &getVariableFC() { 301 assert(NumFiltered == 1); 302 assert(FilterChooserMap.size() == 1); 303 return *(FilterChooserMap.find((unsigned)-1)->second); 304 } 305 306 ARMFilter(const ARMFilter &f); 307 ARMFilter(ARMFilterChooser &owner, unsigned startBit, unsigned numBits, 308 bool mixed); 309 310 ~ARMFilter(); 311 312 // Divides the decoding task into sub tasks and delegates them to the 313 // inferior FilterChooser's. 314 // 315 // A special case arises when there's only one entry in the filtered 316 // instructions. In order to unambiguously decode the singleton, we need to 317 // match the remaining undecoded encoding bits against the singleton. 318 void recurse(); 319 320 // Emit code to decode instructions given a segment or segments of bits. 321 void emit(raw_ostream &o, unsigned &Indentation); 322 323 // Returns the number of fanout produced by the filter. More fanout implies 324 // the filter distinguishes more categories of instructions. 325 unsigned usefulness() const; 326 }; // End of class Filter 327 328 // These are states of our finite state machines used in FilterChooser's 329 // filterProcessor() which produces the filter candidates to use. 330 typedef enum { 331 ATTR_NONE, 332 ATTR_FILTERED, 333 ATTR_ALL_SET, 334 ATTR_ALL_UNSET, 335 ATTR_MIXED 336 } bitAttr_t; 337 338 /// ARMFilterChooser - FilterChooser chooses the best filter among a set of Filters 339 /// in order to perform the decoding of instructions at the current level. 340 /// 341 /// Decoding proceeds from the top down. Based on the well-known encoding bits 342 /// of instructions available, FilterChooser builds up the possible Filters that 343 /// can further the task of decoding by distinguishing among the remaining 344 /// candidate instructions. 345 /// 346 /// Once a filter has been chosen, it is called upon to divide the decoding task 347 /// into sub-tasks and delegates them to its inferior FilterChoosers for further 348 /// processings. 349 /// 350 /// It is useful to think of a Filter as governing the switch stmts of the 351 /// decoding tree. And each case is delegated to an inferior FilterChooser to 352 /// decide what further remaining bits to look at. 353 class ARMFilterChooser { 354 static TARGET_NAME_t TargetName; 355 356 protected: 357 friend class ARMFilter; 358 359 // Vector of codegen instructions to choose our filter. 360 const std::vector<const CodeGenInstruction*> &AllInstructions; 361 362 // Vector of uid's for this filter chooser to work on. 363 const std::vector<unsigned> Opcodes; 364 365 // Vector of candidate filters. 366 std::vector<ARMFilter> Filters; 367 368 // Array of bit values passed down from our parent. 369 // Set to all BIT_UNFILTERED's for Parent == NULL. 370 bit_value_t FilterBitValues[BIT_WIDTH]; 371 372 // Links to the FilterChooser above us in the decoding tree. 373 ARMFilterChooser *Parent; 374 375 // Index of the best filter from Filters. 376 int BestIndex; 377 378 public: 379 static void setTargetName(TARGET_NAME_t tn) { TargetName = tn; } 380 381 ARMFilterChooser(const ARMFilterChooser &FC) : 382 AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes), 383 Filters(FC.Filters), Parent(FC.Parent), BestIndex(FC.BestIndex) { 384 memcpy(FilterBitValues, FC.FilterBitValues, sizeof(FilterBitValues)); 385 } 386 387 ARMFilterChooser(const std::vector<const CodeGenInstruction*> &Insts, 388 const std::vector<unsigned> &IDs) : 389 AllInstructions(Insts), Opcodes(IDs), Filters(), Parent(NULL), 390 BestIndex(-1) { 391 for (unsigned i = 0; i < BIT_WIDTH; ++i) 392 FilterBitValues[i] = BIT_UNFILTERED; 393 394 doFilter(); 395 } 396 397 ARMFilterChooser(const std::vector<const CodeGenInstruction*> &Insts, 398 const std::vector<unsigned> &IDs, 399 bit_value_t (&ParentFilterBitValues)[BIT_WIDTH], 400 ARMFilterChooser &parent) : 401 AllInstructions(Insts), Opcodes(IDs), Filters(), Parent(&parent), 402 BestIndex(-1) { 403 for (unsigned i = 0; i < BIT_WIDTH; ++i) 404 FilterBitValues[i] = ParentFilterBitValues[i]; 405 406 doFilter(); 407 } 408 409 // The top level filter chooser has NULL as its parent. 410 bool isTopLevel() { return Parent == NULL; } 411 412 // This provides an opportunity for target specific code emission. 413 void emitTopHook(raw_ostream &o); 414 415 // Emit the top level typedef and decodeInstruction() function. 416 void emitTop(raw_ostream &o, unsigned &Indentation); 417 418 // This provides an opportunity for target specific code emission after 419 // emitTop(). 420 void emitBot(raw_ostream &o, unsigned &Indentation); 421 422 protected: 423 // Populates the insn given the uid. 424 void insnWithID(insn_t &Insn, unsigned Opcode) const { 425 if (AllInstructions[Opcode]->isPseudo) 426 return; 427 428 BitsInit &Bits = getBitsField(*AllInstructions[Opcode]->TheDef, "Inst"); 429 430 for (unsigned i = 0; i < BIT_WIDTH; ++i) 431 Insn[i] = bitFromBits(Bits, i); 432 433 // Set Inst{21} to 1 (wback) when IndexModeBits == IndexModeUpd. 434 Record *R = AllInstructions[Opcode]->TheDef; 435 if (R->getValue("IndexModeBits") && 436 getByteField(*R, "IndexModeBits") == IndexModeUpd) 437 Insn[21] = BIT_TRUE; 438 } 439 440 // Returns the record name. 441 const std::string &nameWithID(unsigned Opcode) const { 442 return AllInstructions[Opcode]->TheDef->getName(); 443 } 444 445 // Populates the field of the insn given the start position and the number of 446 // consecutive bits to scan for. 447 // 448 // Returns false if there exists any uninitialized bit value in the range. 449 // Returns true, otherwise. 450 bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit, 451 unsigned NumBits) const; 452 453 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 454 /// filter array as a series of chars. 455 void dumpFilterArray(raw_ostream &o, bit_value_t (&filter)[BIT_WIDTH]); 456 457 /// dumpStack - dumpStack traverses the filter chooser chain and calls 458 /// dumpFilterArray on each filter chooser up to the top level one. 459 void dumpStack(raw_ostream &o, const char *prefix); 460 461 ARMFilter &bestFilter() { 462 assert(BestIndex != -1 && "BestIndex not set"); 463 return Filters[BestIndex]; 464 } 465 466 // Called from Filter::recurse() when singleton exists. For debug purpose. 467 void SingletonExists(unsigned Opc); 468 469 bool PositionFiltered(unsigned i) { 470 return ValueSet(FilterBitValues[i]); 471 } 472 473 // Calculates the island(s) needed to decode the instruction. 474 // This returns a lit of undecoded bits of an instructions, for example, 475 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 476 // decoded bits in order to verify that the instruction matches the Opcode. 477 unsigned getIslands(std::vector<unsigned> &StartBits, 478 std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals, 479 insn_t &Insn); 480 481 // The purpose of this function is for the API client to detect possible 482 // Load/Store Coprocessor instructions. If the coprocessor number is of 483 // the instruction is either 10 or 11, the decoder should not report the 484 // instruction as LDC/LDC2/STC/STC2, but should match against Advanced SIMD or 485 // VFP instructions. 486 bool LdStCopEncoding1(unsigned Opc) { 487 const std::string &Name = nameWithID(Opc); 488 if (Name == "LDC_OFFSET" || Name == "LDC_OPTION" || 489 Name == "LDC_POST" || Name == "LDC_PRE" || 490 Name == "LDCL_OFFSET" || Name == "LDCL_OPTION" || 491 Name == "LDCL_POST" || Name == "LDCL_PRE" || 492 Name == "STC_OFFSET" || Name == "STC_OPTION" || 493 Name == "STC_POST" || Name == "STC_PRE" || 494 Name == "STCL_OFFSET" || Name == "STCL_OPTION" || 495 Name == "STCL_POST" || Name == "STCL_PRE") 496 return true; 497 else 498 return false; 499 } 500 501 // Emits code to decode the singleton. Return true if we have matched all the 502 // well-known bits. 503 bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,unsigned Opc); 504 505 // Emits code to decode the singleton, and then to decode the rest. 506 void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation, 507 ARMFilter &Best); 508 509 // Assign a single filter and run with it. 510 void runSingleFilter(ARMFilterChooser &owner, unsigned startBit, 511 unsigned numBit, bool mixed); 512 513 // reportRegion is a helper function for filterProcessor to mark a region as 514 // eligible for use as a filter region. 515 void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, 516 bool AllowMixed); 517 518 // FilterProcessor scans the well-known encoding bits of the instructions and 519 // builds up a list of candidate filters. It chooses the best filter and 520 // recursively descends down the decoding tree. 521 bool filterProcessor(bool AllowMixed, bool Greedy = true); 522 523 // Decides on the best configuration of filter(s) to use in order to decode 524 // the instructions. A conflict of instructions may occur, in which case we 525 // dump the conflict set to the standard error. 526 void doFilter(); 527 528 // Emits code to decode our share of instructions. Returns true if the 529 // emitted code causes a return, which occurs if we know how to decode 530 // the instruction at this level or the instruction is not decodeable. 531 bool emit(raw_ostream &o, unsigned &Indentation); 532 }; 533 534 /////////////////////////// 535 // // 536 // Filter Implmenetation // 537 // // 538 /////////////////////////// 539 540 ARMFilter::ARMFilter(const ARMFilter &f) : 541 Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed), 542 FilteredInstructions(f.FilteredInstructions), 543 VariableInstructions(f.VariableInstructions), 544 FilterChooserMap(f.FilterChooserMap), NumFiltered(f.NumFiltered), 545 LastOpcFiltered(f.LastOpcFiltered), NumVariable(f.NumVariable) { 546 } 547 548 ARMFilter::ARMFilter(ARMFilterChooser &owner, unsigned startBit, unsigned numBits, 549 bool mixed) : Owner(&owner), StartBit(startBit), NumBits(numBits), 550 Mixed(mixed) { 551 assert(StartBit + NumBits - 1 < BIT_WIDTH); 552 553 NumFiltered = 0; 554 LastOpcFiltered = 0; 555 NumVariable = 0; 556 557 for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) { 558 insn_t Insn; 559 560 // Populates the insn given the uid. 561 Owner->insnWithID(Insn, Owner->Opcodes[i]); 562 563 uint64_t Field; 564 // Scans the segment for possibly well-specified encoding bits. 565 bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits); 566 567 if (ok) { 568 // The encoding bits are well-known. Lets add the uid of the 569 // instruction into the bucket keyed off the constant field value. 570 LastOpcFiltered = Owner->Opcodes[i]; 571 FilteredInstructions[Field].push_back(LastOpcFiltered); 572 ++NumFiltered; 573 } else { 574 // Some of the encoding bit(s) are unspecfied. This contributes to 575 // one additional member of "Variable" instructions. 576 VariableInstructions.push_back(Owner->Opcodes[i]); 577 ++NumVariable; 578 } 579 } 580 581 assert((FilteredInstructions.size() + VariableInstructions.size() > 0) 582 && "Filter returns no instruction categories"); 583 } 584 585 ARMFilter::~ARMFilter() { 586 std::map<unsigned, ARMFilterChooser*>::iterator filterIterator; 587 for (filterIterator = FilterChooserMap.begin(); 588 filterIterator != FilterChooserMap.end(); 589 filterIterator++) { 590 delete filterIterator->second; 591 } 592 } 593 594 // Divides the decoding task into sub tasks and delegates them to the 595 // inferior FilterChooser's. 596 // 597 // A special case arises when there's only one entry in the filtered 598 // instructions. In order to unambiguously decode the singleton, we need to 599 // match the remaining undecoded encoding bits against the singleton. 600 void ARMFilter::recurse() { 601 std::map<uint64_t, std::vector<unsigned> >::const_iterator mapIterator; 602 603 bit_value_t BitValueArray[BIT_WIDTH]; 604 // Starts by inheriting our parent filter chooser's filter bit values. 605 memcpy(BitValueArray, Owner->FilterBitValues, sizeof(BitValueArray)); 606 607 unsigned bitIndex; 608 609 if (VariableInstructions.size()) { 610 // Conservatively marks each segment position as BIT_UNSET. 611 for (bitIndex = 0; bitIndex < NumBits; bitIndex++) 612 BitValueArray[StartBit + bitIndex] = BIT_UNSET; 613 614 // Delegates to an inferior filter chooser for further processing on this 615 // group of instructions whose segment values are variable. 616 FilterChooserMap.insert(std::pair<unsigned, ARMFilterChooser*>( 617 (unsigned)-1, 618 new ARMFilterChooser(Owner->AllInstructions, 619 VariableInstructions, 620 BitValueArray, 621 *Owner) 622 )); 623 } 624 625 // No need to recurse for a singleton filtered instruction. 626 // See also Filter::emit(). 627 if (getNumFiltered() == 1) { 628 //Owner->SingletonExists(LastOpcFiltered); 629 assert(FilterChooserMap.size() == 1); 630 return; 631 } 632 633 // Otherwise, create sub choosers. 634 for (mapIterator = FilteredInstructions.begin(); 635 mapIterator != FilteredInstructions.end(); 636 mapIterator++) { 637 638 // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. 639 for (bitIndex = 0; bitIndex < NumBits; bitIndex++) { 640 if (mapIterator->first & (1ULL << bitIndex)) 641 BitValueArray[StartBit + bitIndex] = BIT_TRUE; 642 else 643 BitValueArray[StartBit + bitIndex] = BIT_FALSE; 644 } 645 646 // Delegates to an inferior filter chooser for further processing on this 647 // category of instructions. 648 FilterChooserMap.insert(std::pair<unsigned, ARMFilterChooser*>( 649 mapIterator->first, 650 new ARMFilterChooser(Owner->AllInstructions, 651 mapIterator->second, 652 BitValueArray, 653 *Owner) 654 )); 655 } 656 } 657 658 // Emit code to decode instructions given a segment or segments of bits. 659 void ARMFilter::emit(raw_ostream &o, unsigned &Indentation) { 660 o.indent(Indentation) << "// Check Inst{"; 661 662 if (NumBits > 1) 663 o << (StartBit + NumBits - 1) << '-'; 664 665 o << StartBit << "} ...\n"; 666 667 o.indent(Indentation) << "switch (fieldFromInstruction(insn, " 668 << StartBit << ", " << NumBits << ")) {\n"; 669 670 std::map<unsigned, ARMFilterChooser*>::iterator filterIterator; 671 672 bool DefaultCase = false; 673 for (filterIterator = FilterChooserMap.begin(); 674 filterIterator != FilterChooserMap.end(); 675 filterIterator++) { 676 677 // Field value -1 implies a non-empty set of variable instructions. 678 // See also recurse(). 679 if (filterIterator->first == (unsigned)-1) { 680 DefaultCase = true; 681 682 o.indent(Indentation) << "default:\n"; 683 o.indent(Indentation) << " break; // fallthrough\n"; 684 685 // Closing curly brace for the switch statement. 686 // This is unconventional because we want the default processing to be 687 // performed for the fallthrough cases as well, i.e., when the "cases" 688 // did not prove a decoded instruction. 689 o.indent(Indentation) << "}\n"; 690 691 } else 692 o.indent(Indentation) << "case " << filterIterator->first << ":\n"; 693 694 // We arrive at a category of instructions with the same segment value. 695 // Now delegate to the sub filter chooser for further decodings. 696 // The case may fallthrough, which happens if the remaining well-known 697 // encoding bits do not match exactly. 698 if (!DefaultCase) { ++Indentation; ++Indentation; } 699 700 bool finished = filterIterator->second->emit(o, Indentation); 701 // For top level default case, there's no need for a break statement. 702 if (Owner->isTopLevel() && DefaultCase) 703 break; 704 if (!finished) 705 o.indent(Indentation) << "break;\n"; 706 707 if (!DefaultCase) { --Indentation; --Indentation; } 708 } 709 710 // If there is no default case, we still need to supply a closing brace. 711 if (!DefaultCase) { 712 // Closing curly brace for the switch statement. 713 o.indent(Indentation) << "}\n"; 714 } 715 } 716 717 // Returns the number of fanout produced by the filter. More fanout implies 718 // the filter distinguishes more categories of instructions. 719 unsigned ARMFilter::usefulness() const { 720 if (VariableInstructions.size()) 721 return FilteredInstructions.size(); 722 else 723 return FilteredInstructions.size() + 1; 724 } 725 726 ////////////////////////////////// 727 // // 728 // Filterchooser Implementation // 729 // // 730 ////////////////////////////////// 731 732 // Define the symbol here. 733 TARGET_NAME_t ARMFilterChooser::TargetName; 734 735 // This provides an opportunity for target specific code emission. 736 void ARMFilterChooser::emitTopHook(raw_ostream &o) { 737 if (TargetName == TARGET_ARM) { 738 // Emit code that references the ARMFormat data type. 739 o << "static const ARMFormat ARMFormats[] = {\n"; 740 for (unsigned i = 0, e = AllInstructions.size(); i != e; ++i) { 741 const Record &Def = *(AllInstructions[i]->TheDef); 742 const std::string &Name = Def.getName(); 743 if (Def.isSubClassOf("InstARM") || Def.isSubClassOf("InstThumb")) 744 o.indent(2) << 745 stringForARMFormat((ARMFormat)getByteField(Def, "Form")); 746 else 747 o << " ARM_FORMAT_NA"; 748 749 o << ",\t// Inst #" << i << " = " << Name << '\n'; 750 } 751 o << " ARM_FORMAT_NA\t// Unreachable.\n"; 752 o << "};\n\n"; 753 } 754 } 755 756 // Emit the top level typedef and decodeInstruction() function. 757 void ARMFilterChooser::emitTop(raw_ostream &o, unsigned &Indentation) { 758 // Run the target specific emit hook. 759 emitTopHook(o); 760 761 switch (BIT_WIDTH) { 762 case 8: 763 o.indent(Indentation) << "typedef uint8_t field_t;\n"; 764 break; 765 case 16: 766 o.indent(Indentation) << "typedef uint16_t field_t;\n"; 767 break; 768 case 32: 769 o.indent(Indentation) << "typedef uint32_t field_t;\n"; 770 break; 771 case 64: 772 o.indent(Indentation) << "typedef uint64_t field_t;\n"; 773 break; 774 default: 775 assert(0 && "Unexpected instruction size!"); 776 } 777 778 o << '\n'; 779 780 o.indent(Indentation) << "static field_t " << 781 "fieldFromInstruction(field_t insn, unsigned startBit, unsigned numBits)\n"; 782 783 o.indent(Indentation) << "{\n"; 784 785 ++Indentation; ++Indentation; 786 o.indent(Indentation) << "assert(startBit + numBits <= " << BIT_WIDTH 787 << " && \"Instruction field out of bounds!\");\n"; 788 o << '\n'; 789 o.indent(Indentation) << "field_t fieldMask;\n"; 790 o << '\n'; 791 o.indent(Indentation) << "if (numBits == " << BIT_WIDTH << ")\n"; 792 793 ++Indentation; ++Indentation; 794 o.indent(Indentation) << "fieldMask = (field_t)-1;\n"; 795 --Indentation; --Indentation; 796 797 o.indent(Indentation) << "else\n"; 798 799 ++Indentation; ++Indentation; 800 o.indent(Indentation) << "fieldMask = ((1 << numBits) - 1) << startBit;\n"; 801 --Indentation; --Indentation; 802 803 o << '\n'; 804 o.indent(Indentation) << "return (insn & fieldMask) >> startBit;\n"; 805 --Indentation; --Indentation; 806 807 o.indent(Indentation) << "}\n"; 808 809 o << '\n'; 810 811 o.indent(Indentation) <<"static uint16_t decodeInstruction(field_t insn) {\n"; 812 813 ++Indentation; ++Indentation; 814 // Emits code to decode the instructions. 815 emit(o, Indentation); 816 817 o << '\n'; 818 o.indent(Indentation) << "return 0;\n"; 819 --Indentation; --Indentation; 820 821 o.indent(Indentation) << "}\n"; 822 823 o << '\n'; 824 } 825 826 // This provides an opportunity for target specific code emission after 827 // emitTop(). 828 void ARMFilterChooser::emitBot(raw_ostream &o, unsigned &Indentation) { 829 if (TargetName != TARGET_THUMB) return; 830 831 // Emit code that decodes the Thumb ISA. 832 o.indent(Indentation) 833 << "static uint16_t decodeThumbInstruction(field_t insn) {\n"; 834 835 ++Indentation; ++Indentation; 836 837 // Emits code to decode the instructions. 838 emit(o, Indentation); 839 840 o << '\n'; 841 o.indent(Indentation) << "return 0;\n"; 842 843 --Indentation; --Indentation; 844 845 o.indent(Indentation) << "}\n"; 846 } 847 848 // Populates the field of the insn given the start position and the number of 849 // consecutive bits to scan for. 850 // 851 // Returns false if and on the first uninitialized bit value encountered. 852 // Returns true, otherwise. 853 bool ARMFilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn, 854 unsigned StartBit, unsigned NumBits) const { 855 Field = 0; 856 857 for (unsigned i = 0; i < NumBits; ++i) { 858 if (Insn[StartBit + i] == BIT_UNSET) 859 return false; 860 861 if (Insn[StartBit + i] == BIT_TRUE) 862 Field = Field | (1ULL << i); 863 } 864 865 return true; 866 } 867 868 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 869 /// filter array as a series of chars. 870 void ARMFilterChooser::dumpFilterArray(raw_ostream &o, 871 bit_value_t (&filter)[BIT_WIDTH]) { 872 unsigned bitIndex; 873 874 for (bitIndex = BIT_WIDTH; bitIndex > 0; bitIndex--) { 875 switch (filter[bitIndex - 1]) { 876 case BIT_UNFILTERED: 877 o << "."; 878 break; 879 case BIT_UNSET: 880 o << "_"; 881 break; 882 case BIT_TRUE: 883 o << "1"; 884 break; 885 case BIT_FALSE: 886 o << "0"; 887 break; 888 } 889 } 890 } 891 892 /// dumpStack - dumpStack traverses the filter chooser chain and calls 893 /// dumpFilterArray on each filter chooser up to the top level one. 894 void ARMFilterChooser::dumpStack(raw_ostream &o, const char *prefix) { 895 ARMFilterChooser *current = this; 896 897 while (current) { 898 o << prefix; 899 dumpFilterArray(o, current->FilterBitValues); 900 o << '\n'; 901 current = current->Parent; 902 } 903 } 904 905 // Called from Filter::recurse() when singleton exists. For debug purpose. 906 void ARMFilterChooser::SingletonExists(unsigned Opc) { 907 insn_t Insn0; 908 insnWithID(Insn0, Opc); 909 910 errs() << "Singleton exists: " << nameWithID(Opc) 911 << " with its decoding dominating "; 912 for (unsigned i = 0; i < Opcodes.size(); ++i) { 913 if (Opcodes[i] == Opc) continue; 914 errs() << nameWithID(Opcodes[i]) << ' '; 915 } 916 errs() << '\n'; 917 918 dumpStack(errs(), "\t\t"); 919 for (unsigned i = 0; i < Opcodes.size(); i++) { 920 const std::string &Name = nameWithID(Opcodes[i]); 921 922 errs() << '\t' << Name << " "; 923 dumpBits(errs(), 924 getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst")); 925 errs() << '\n'; 926 } 927 } 928 929 // Calculates the island(s) needed to decode the instruction. 930 // This returns a list of undecoded bits of an instructions, for example, 931 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 932 // decoded bits in order to verify that the instruction matches the Opcode. 933 unsigned ARMFilterChooser::getIslands(std::vector<unsigned> &StartBits, 934 std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals, 935 insn_t &Insn) { 936 unsigned Num, BitNo; 937 Num = BitNo = 0; 938 939 uint64_t FieldVal = 0; 940 941 // 0: Init 942 // 1: Water (the bit value does not affect decoding) 943 // 2: Island (well-known bit value needed for decoding) 944 int State = 0; 945 int Val = -1; 946 947 for (unsigned i = 0; i < BIT_WIDTH; ++i) { 948 Val = Value(Insn[i]); 949 bool Filtered = PositionFiltered(i); 950 switch (State) { 951 default: 952 assert(0 && "Unreachable code!"); 953 break; 954 case 0: 955 case 1: 956 if (Filtered || Val == -1) 957 State = 1; // Still in Water 958 else { 959 State = 2; // Into the Island 960 BitNo = 0; 961 StartBits.push_back(i); 962 FieldVal = Val; 963 } 964 break; 965 case 2: 966 if (Filtered || Val == -1) { 967 State = 1; // Into the Water 968 EndBits.push_back(i - 1); 969 FieldVals.push_back(FieldVal); 970 ++Num; 971 } else { 972 State = 2; // Still in Island 973 ++BitNo; 974 FieldVal = FieldVal | Val << BitNo; 975 } 976 break; 977 } 978 } 979 // If we are still in Island after the loop, do some housekeeping. 980 if (State == 2) { 981 EndBits.push_back(BIT_WIDTH - 1); 982 FieldVals.push_back(FieldVal); 983 ++Num; 984 } 985 986 assert(StartBits.size() == Num && EndBits.size() == Num && 987 FieldVals.size() == Num); 988 return Num; 989 } 990 991 // Emits code to decode the singleton. Return true if we have matched all the 992 // well-known bits. 993 bool ARMFilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation, 994 unsigned Opc) { 995 std::vector<unsigned> StartBits; 996 std::vector<unsigned> EndBits; 997 std::vector<uint64_t> FieldVals; 998 insn_t Insn; 999 insnWithID(Insn, Opc); 1000 1001 // This provides a good opportunity to check for possible Ld/St Coprocessor 1002 // Opcode and escapes if the coproc # is either 10 or 11. It is a NEON/VFP 1003 // instruction is disguise. 1004 if (TargetName == TARGET_ARM && LdStCopEncoding1(Opc)) { 1005 o.indent(Indentation); 1006 // A8.6.51 & A8.6.188 1007 // If coproc = 0b101?, i.e, slice(insn, 11, 8) = 10 or 11, escape. 1008 o << "if (fieldFromInstruction(insn, 9, 3) == 5) break; // fallthrough\n"; 1009 } 1010 1011 // Look for islands of undecoded bits of the singleton. 1012 getIslands(StartBits, EndBits, FieldVals, Insn); 1013 1014 unsigned Size = StartBits.size(); 1015 unsigned I, NumBits; 1016 1017 // If we have matched all the well-known bits, just issue a return. 1018 if (Size == 0) { 1019 o.indent(Indentation) << "return " << Opc << "; // " << nameWithID(Opc) 1020 << '\n'; 1021 return true; 1022 } 1023 1024 // Otherwise, there are more decodings to be done! 1025 1026 // Emit code to match the island(s) for the singleton. 1027 o.indent(Indentation) << "// Check "; 1028 1029 for (I = Size; I != 0; --I) { 1030 o << "Inst{" << EndBits[I-1] << '-' << StartBits[I-1] << "} "; 1031 if (I > 1) 1032 o << "&& "; 1033 else 1034 o << "for singleton decoding...\n"; 1035 } 1036 1037 o.indent(Indentation) << "if ("; 1038 1039 for (I = Size; I != 0; --I) { 1040 NumBits = EndBits[I-1] - StartBits[I-1] + 1; 1041 o << "fieldFromInstruction(insn, " << StartBits[I-1] << ", " << NumBits 1042 << ") == " << FieldVals[I-1]; 1043 if (I > 1) 1044 o << " && "; 1045 else 1046 o << ")\n"; 1047 } 1048 1049 o.indent(Indentation) << " return " << Opc << "; // " << nameWithID(Opc) 1050 << '\n'; 1051 1052 return false; 1053 } 1054 1055 // Emits code to decode the singleton, and then to decode the rest. 1056 void ARMFilterChooser::emitSingletonDecoder(raw_ostream &o, 1057 unsigned &Indentation, 1058 ARMFilter &Best) { 1059 1060 unsigned Opc = Best.getSingletonOpc(); 1061 1062 emitSingletonDecoder(o, Indentation, Opc); 1063 1064 // Emit code for the rest. 1065 o.indent(Indentation) << "else\n"; 1066 1067 Indentation += 2; 1068 Best.getVariableFC().emit(o, Indentation); 1069 Indentation -= 2; 1070 } 1071 1072 // Assign a single filter and run with it. Top level API client can initialize 1073 // with a single filter to start the filtering process. 1074 void ARMFilterChooser::runSingleFilter(ARMFilterChooser &owner, 1075 unsigned startBit, 1076 unsigned numBit, bool mixed) { 1077 Filters.clear(); 1078 ARMFilter F(*this, startBit, numBit, true); 1079 Filters.push_back(F); 1080 BestIndex = 0; // Sole Filter instance to choose from. 1081 bestFilter().recurse(); 1082 } 1083 1084 // reportRegion is a helper function for filterProcessor to mark a region as 1085 // eligible for use as a filter region. 1086 void ARMFilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, 1087 unsigned BitIndex, bool AllowMixed) { 1088 if (RA == ATTR_MIXED && AllowMixed) 1089 Filters.push_back(ARMFilter(*this, StartBit, BitIndex - StartBit, true)); 1090 else if (RA == ATTR_ALL_SET && !AllowMixed) 1091 Filters.push_back(ARMFilter(*this, StartBit, BitIndex - StartBit, false)); 1092 } 1093 1094 // FilterProcessor scans the well-known encoding bits of the instructions and 1095 // builds up a list of candidate filters. It chooses the best filter and 1096 // recursively descends down the decoding tree. 1097 bool ARMFilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { 1098 Filters.clear(); 1099 BestIndex = -1; 1100 unsigned numInstructions = Opcodes.size(); 1101 1102 assert(numInstructions && "Filter created with no instructions"); 1103 1104 // No further filtering is necessary. 1105 if (numInstructions == 1) 1106 return true; 1107 1108 // Heuristics. See also doFilter()'s "Heuristics" comment when num of 1109 // instructions is 3. 1110 if (AllowMixed && !Greedy) { 1111 assert(numInstructions == 3); 1112 1113 for (unsigned i = 0; i < Opcodes.size(); ++i) { 1114 std::vector<unsigned> StartBits; 1115 std::vector<unsigned> EndBits; 1116 std::vector<uint64_t> FieldVals; 1117 insn_t Insn; 1118 1119 insnWithID(Insn, Opcodes[i]); 1120 1121 // Look for islands of undecoded bits of any instruction. 1122 if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) { 1123 // Found an instruction with island(s). Now just assign a filter. 1124 runSingleFilter(*this, StartBits[0], EndBits[0] - StartBits[0] + 1, 1125 true); 1126 return true; 1127 } 1128 } 1129 } 1130 1131 unsigned BitIndex, InsnIndex; 1132 1133 // We maintain BIT_WIDTH copies of the bitAttrs automaton. 1134 // The automaton consumes the corresponding bit from each 1135 // instruction. 1136 // 1137 // Input symbols: 0, 1, and _ (unset). 1138 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED. 1139 // Initial state: NONE. 1140 // 1141 // (NONE) ------- [01] -> (ALL_SET) 1142 // (NONE) ------- _ ----> (ALL_UNSET) 1143 // (ALL_SET) ---- [01] -> (ALL_SET) 1144 // (ALL_SET) ---- _ ----> (MIXED) 1145 // (ALL_UNSET) -- [01] -> (MIXED) 1146 // (ALL_UNSET) -- _ ----> (ALL_UNSET) 1147 // (MIXED) ------ . ----> (MIXED) 1148 // (FILTERED)---- . ----> (FILTERED) 1149 1150 bitAttr_t bitAttrs[BIT_WIDTH]; 1151 1152 // FILTERED bit positions provide no entropy and are not worthy of pursuing. 1153 // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position. 1154 for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex) 1155 if (FilterBitValues[BitIndex] == BIT_TRUE || 1156 FilterBitValues[BitIndex] == BIT_FALSE) 1157 bitAttrs[BitIndex] = ATTR_FILTERED; 1158 else 1159 bitAttrs[BitIndex] = ATTR_NONE; 1160 1161 for (InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) { 1162 insn_t insn; 1163 1164 insnWithID(insn, Opcodes[InsnIndex]); 1165 1166 for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex) { 1167 switch (bitAttrs[BitIndex]) { 1168 case ATTR_NONE: 1169 if (insn[BitIndex] == BIT_UNSET) 1170 bitAttrs[BitIndex] = ATTR_ALL_UNSET; 1171 else 1172 bitAttrs[BitIndex] = ATTR_ALL_SET; 1173 break; 1174 case ATTR_ALL_SET: 1175 if (insn[BitIndex] == BIT_UNSET) 1176 bitAttrs[BitIndex] = ATTR_MIXED; 1177 break; 1178 case ATTR_ALL_UNSET: 1179 if (insn[BitIndex] != BIT_UNSET) 1180 bitAttrs[BitIndex] = ATTR_MIXED; 1181 break; 1182 case ATTR_MIXED: 1183 case ATTR_FILTERED: 1184 break; 1185 } 1186 } 1187 } 1188 1189 // The regionAttr automaton consumes the bitAttrs automatons' state, 1190 // lowest-to-highest. 1191 // 1192 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed) 1193 // States: NONE, ALL_SET, MIXED 1194 // Initial state: NONE 1195 // 1196 // (NONE) ----- F --> (NONE) 1197 // (NONE) ----- S --> (ALL_SET) ; and set region start 1198 // (NONE) ----- U --> (NONE) 1199 // (NONE) ----- M --> (MIXED) ; and set region start 1200 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region 1201 // (ALL_SET) -- S --> (ALL_SET) 1202 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region 1203 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region 1204 // (MIXED) ---- F --> (NONE) ; and report a MIXED region 1205 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region 1206 // (MIXED) ---- U --> (NONE) ; and report a MIXED region 1207 // (MIXED) ---- M --> (MIXED) 1208 1209 bitAttr_t RA = ATTR_NONE; 1210 unsigned StartBit = 0; 1211 1212 for (BitIndex = 0; BitIndex < BIT_WIDTH; BitIndex++) { 1213 bitAttr_t bitAttr = bitAttrs[BitIndex]; 1214 1215 assert(bitAttr != ATTR_NONE && "Bit without attributes"); 1216 1217 switch (RA) { 1218 case ATTR_NONE: 1219 switch (bitAttr) { 1220 case ATTR_FILTERED: 1221 break; 1222 case ATTR_ALL_SET: 1223 StartBit = BitIndex; 1224 RA = ATTR_ALL_SET; 1225 break; 1226 case ATTR_ALL_UNSET: 1227 break; 1228 case ATTR_MIXED: 1229 StartBit = BitIndex; 1230 RA = ATTR_MIXED; 1231 break; 1232 default: 1233 assert(0 && "Unexpected bitAttr!"); 1234 } 1235 break; 1236 case ATTR_ALL_SET: 1237 switch (bitAttr) { 1238 case ATTR_FILTERED: 1239 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1240 RA = ATTR_NONE; 1241 break; 1242 case ATTR_ALL_SET: 1243 break; 1244 case ATTR_ALL_UNSET: 1245 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1246 RA = ATTR_NONE; 1247 break; 1248 case ATTR_MIXED: 1249 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1250 StartBit = BitIndex; 1251 RA = ATTR_MIXED; 1252 break; 1253 default: 1254 assert(0 && "Unexpected bitAttr!"); 1255 } 1256 break; 1257 case ATTR_MIXED: 1258 switch (bitAttr) { 1259 case ATTR_FILTERED: 1260 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1261 StartBit = BitIndex; 1262 RA = ATTR_NONE; 1263 break; 1264 case ATTR_ALL_SET: 1265 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1266 StartBit = BitIndex; 1267 RA = ATTR_ALL_SET; 1268 break; 1269 case ATTR_ALL_UNSET: 1270 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1271 RA = ATTR_NONE; 1272 break; 1273 case ATTR_MIXED: 1274 break; 1275 default: 1276 assert(0 && "Unexpected bitAttr!"); 1277 } 1278 break; 1279 case ATTR_ALL_UNSET: 1280 assert(0 && "regionAttr state machine has no ATTR_UNSET state"); 1281 case ATTR_FILTERED: 1282 assert(0 && "regionAttr state machine has no ATTR_FILTERED state"); 1283 } 1284 } 1285 1286 // At the end, if we're still in ALL_SET or MIXED states, report a region 1287 switch (RA) { 1288 case ATTR_NONE: 1289 break; 1290 case ATTR_FILTERED: 1291 break; 1292 case ATTR_ALL_SET: 1293 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1294 break; 1295 case ATTR_ALL_UNSET: 1296 break; 1297 case ATTR_MIXED: 1298 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1299 break; 1300 } 1301 1302 // We have finished with the filter processings. Now it's time to choose 1303 // the best performing filter. 1304 BestIndex = 0; 1305 bool AllUseless = true; 1306 unsigned BestScore = 0; 1307 1308 for (unsigned i = 0, e = Filters.size(); i != e; ++i) { 1309 unsigned Usefulness = Filters[i].usefulness(); 1310 1311 if (Usefulness) 1312 AllUseless = false; 1313 1314 if (Usefulness > BestScore) { 1315 BestIndex = i; 1316 BestScore = Usefulness; 1317 } 1318 } 1319 1320 if (!AllUseless) 1321 bestFilter().recurse(); 1322 1323 return !AllUseless; 1324 } // end of FilterChooser::filterProcessor(bool) 1325 1326 // Decides on the best configuration of filter(s) to use in order to decode 1327 // the instructions. A conflict of instructions may occur, in which case we 1328 // dump the conflict set to the standard error. 1329 void ARMFilterChooser::doFilter() { 1330 unsigned Num = Opcodes.size(); 1331 assert(Num && "FilterChooser created with no instructions"); 1332 1333 // Heuristics: Use Inst{31-28} as the top level filter for ARM ISA. 1334 if (TargetName == TARGET_ARM && Parent == NULL) { 1335 runSingleFilter(*this, 28, 4, false); 1336 return; 1337 } 1338 1339 // Try regions of consecutive known bit values first. 1340 if (filterProcessor(false)) 1341 return; 1342 1343 // Then regions of mixed bits (both known and unitialized bit values allowed). 1344 if (filterProcessor(true)) 1345 return; 1346 1347 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where 1348 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a 1349 // well-known encoding pattern. In such case, we backtrack and scan for the 1350 // the very first consecutive ATTR_ALL_SET region and assign a filter to it. 1351 if (Num == 3 && filterProcessor(true, false)) 1352 return; 1353 1354 // If we come to here, the instruction decoding has failed. 1355 // Set the BestIndex to -1 to indicate so. 1356 BestIndex = -1; 1357 } 1358 1359 // Emits code to decode our share of instructions. Returns true if the 1360 // emitted code causes a return, which occurs if we know how to decode 1361 // the instruction at this level or the instruction is not decodeable. 1362 bool ARMFilterChooser::emit(raw_ostream &o, unsigned &Indentation) { 1363 if (Opcodes.size() == 1) 1364 // There is only one instruction in the set, which is great! 1365 // Call emitSingletonDecoder() to see whether there are any remaining 1366 // encodings bits. 1367 return emitSingletonDecoder(o, Indentation, Opcodes[0]); 1368 1369 // Choose the best filter to do the decodings! 1370 if (BestIndex != -1) { 1371 ARMFilter &Best = bestFilter(); 1372 if (Best.getNumFiltered() == 1) 1373 emitSingletonDecoder(o, Indentation, Best); 1374 else 1375 bestFilter().emit(o, Indentation); 1376 return false; 1377 } 1378 1379 // If we reach here, there is a conflict in decoding. Let's resolve the known 1380 // conflicts! 1381 if ((TargetName == TARGET_ARM || TargetName == TARGET_THUMB) && 1382 Opcodes.size() == 2) { 1383 // Resolve the known conflict sets: 1384 // 1385 // 1. source registers are identical => VMOVDneon; otherwise => VORRd 1386 // 2. source registers are identical => VMOVQ; otherwise => VORRq 1387 // 3. LDR, LDRcp => return LDR for now. 1388 // FIXME: How can we distinguish between LDR and LDRcp? Do we need to? 1389 // 4. tLDMIA, tLDMIA_UPD => Rn = Inst{10-8}, reglist = Inst{7-0}, 1390 // wback = registers<Rn> = 0 1391 // NOTE: (tLDM, tLDM_UPD) resolution must come before Advanced SIMD 1392 // addressing mode resolution!!! 1393 // 5. VLD[234]LN*/VST[234]LN* vs. VLD[234]LN*_UPD/VST[234]LN*_UPD conflicts 1394 // are resolved returning the non-UPD versions of the instructions if the 1395 // Rm field, i.e., Inst{3-0} is 0b1111. This is specified in A7.7.1 1396 // Advanced SIMD addressing mode. 1397 const std::string &name1 = nameWithID(Opcodes[0]); 1398 const std::string &name2 = nameWithID(Opcodes[1]); 1399 if ((name1 == "VMOVDneon" && name2 == "VORRd") || 1400 (name1 == "VMOVQ" && name2 == "VORRq")) { 1401 // Inserting the opening curly brace for this case block. 1402 --Indentation; --Indentation; 1403 o.indent(Indentation) << "{\n"; 1404 ++Indentation; ++Indentation; 1405 1406 o.indent(Indentation) 1407 << "field_t N = fieldFromInstruction(insn, 7, 1), " 1408 << "M = fieldFromInstruction(insn, 5, 1);\n"; 1409 o.indent(Indentation) 1410 << "field_t Vn = fieldFromInstruction(insn, 16, 4), " 1411 << "Vm = fieldFromInstruction(insn, 0, 4);\n"; 1412 o.indent(Indentation) 1413 << "return (N == M && Vn == Vm) ? " 1414 << Opcodes[0] << " /* " << name1 << " */ : " 1415 << Opcodes[1] << " /* " << name2 << " */ ;\n"; 1416 1417 // Inserting the closing curly brace for this case block. 1418 --Indentation; --Indentation; 1419 o.indent(Indentation) << "}\n"; 1420 ++Indentation; ++Indentation; 1421 1422 return true; 1423 } 1424 if (name1 == "LDR" && name2 == "LDRcp") { 1425 o.indent(Indentation) 1426 << "return " << Opcodes[0] 1427 << "; // Returning LDR for {LDR, LDRcp}\n"; 1428 return true; 1429 } 1430 if (name1 == "tLDMIA" && name2 == "tLDMIA_UPD") { 1431 // Inserting the opening curly brace for this case block. 1432 --Indentation; --Indentation; 1433 o.indent(Indentation) << "{\n"; 1434 ++Indentation; ++Indentation; 1435 1436 o.indent(Indentation) 1437 << "unsigned Rn = fieldFromInstruction(insn, 8, 3), " 1438 << "list = fieldFromInstruction(insn, 0, 8);\n"; 1439 o.indent(Indentation) 1440 << "return ((list >> Rn) & 1) == 0 ? " 1441 << Opcodes[1] << " /* " << name2 << " */ : " 1442 << Opcodes[0] << " /* " << name1 << " */ ;\n"; 1443 1444 // Inserting the closing curly brace for this case block. 1445 --Indentation; --Indentation; 1446 o.indent(Indentation) << "}\n"; 1447 ++Indentation; ++Indentation; 1448 1449 return true; 1450 } 1451 if (sameStringExceptSuffix(name1, name2, "_UPD")) { 1452 o.indent(Indentation) 1453 << "return fieldFromInstruction(insn, 0, 4) == 15 ? " << Opcodes[0] 1454 << " /* " << name1 << " */ : " << Opcodes[1] << "/* " << name2 1455 << " */ ; // Advanced SIMD addressing mode\n"; 1456 return true; 1457 } 1458 1459 // Otherwise, it does not belong to the known conflict sets. 1460 } 1461 1462 // We don't know how to decode these instructions! Return 0 and dump the 1463 // conflict set! 1464 o.indent(Indentation) << "return 0;" << " // Conflict set: "; 1465 for (int i = 0, N = Opcodes.size(); i < N; ++i) { 1466 o << nameWithID(Opcodes[i]); 1467 if (i < (N - 1)) 1468 o << ", "; 1469 else 1470 o << '\n'; 1471 } 1472 1473 // Print out useful conflict information for postmortem analysis. 1474 errs() << "Decoding Conflict:\n"; 1475 1476 dumpStack(errs(), "\t\t"); 1477 1478 for (unsigned i = 0; i < Opcodes.size(); i++) { 1479 const std::string &Name = nameWithID(Opcodes[i]); 1480 1481 errs() << '\t' << Name << " "; 1482 dumpBits(errs(), 1483 getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst")); 1484 errs() << '\n'; 1485 } 1486 1487 return true; 1488 } 1489 1490 1491 //////////////////////////////////////////// 1492 // // 1493 // ARMDEBackend // 1494 // (Helper class for ARMDecoderEmitter) // 1495 // // 1496 //////////////////////////////////////////// 1497 1498 class ARMDecoderEmitter::ARMDEBackend { 1499 public: 1500 ARMDEBackend(ARMDecoderEmitter &frontend, RecordKeeper &Records) : 1501 NumberedInstructions(), 1502 Opcodes(), 1503 Frontend(frontend), 1504 Target(Records), 1505 FC(NULL) 1506 { 1507 if (Target.getName() == "ARM") 1508 TargetName = TARGET_ARM; 1509 else { 1510 errs() << "Target name " << Target.getName() << " not recognized\n"; 1511 assert(0 && "Unknown target"); 1512 } 1513 1514 // Populate the instructions for our TargetName. 1515 populateInstructions(); 1516 } 1517 1518 ~ARMDEBackend() { 1519 if (FC) { 1520 delete FC; 1521 FC = NULL; 1522 } 1523 } 1524 1525 void getInstructionsByEnumValue(std::vector<const CodeGenInstruction*> 1526 &NumberedInstructions) { 1527 // We must emit the PHI opcode first... 1528 std::string Namespace = Target.getInstNamespace(); 1529 assert(!Namespace.empty() && "No instructions defined."); 1530 1531 NumberedInstructions = Target.getInstructionsByEnumValue(); 1532 } 1533 1534 bool populateInstruction(const CodeGenInstruction &CGI, TARGET_NAME_t TN); 1535 1536 void populateInstructions(); 1537 1538 // Emits disassembler code for instruction decoding. This delegates to the 1539 // FilterChooser instance to do the heavy lifting. 1540 void emit(raw_ostream &o); 1541 1542 protected: 1543 std::vector<const CodeGenInstruction*> NumberedInstructions; 1544 std::vector<unsigned> Opcodes; 1545 // Special case for the ARM chip, which supports ARM and Thumb ISAs. 1546 // Opcodes2 will be populated with the Thumb opcodes. 1547 std::vector<unsigned> Opcodes2; 1548 ARMDecoderEmitter &Frontend; 1549 CodeGenTarget Target; 1550 ARMFilterChooser *FC; 1551 1552 TARGET_NAME_t TargetName; 1553 }; 1554 1555 bool ARMDecoderEmitter:: 1556 ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI, 1557 TARGET_NAME_t TN) { 1558 const Record &Def = *CGI.TheDef; 1559 const StringRef Name = Def.getName(); 1560 uint8_t Form = getByteField(Def, "Form"); 1561 1562 BitsInit &Bits = getBitsField(Def, "Inst"); 1563 1564 // If all the bit positions are not specified; do not decode this instruction. 1565 // We are bound to fail! For proper disassembly, the well-known encoding bits 1566 // of the instruction must be fully specified. 1567 // 1568 // This also removes pseudo instructions from considerations of disassembly, 1569 // which is a better design and less fragile than the name matchings. 1570 if (Bits.allInComplete()) return false; 1571 1572 // Ignore "asm parser only" instructions. 1573 if (Def.getValueAsBit("isAsmParserOnly")) 1574 return false; 1575 1576 if (TN == TARGET_ARM) { 1577 if (Form == ARM_FORMAT_PSEUDO) 1578 return false; 1579 if (thumbInstruction(Form)) 1580 return false; 1581 1582 // Tail calls are other patterns that generate existing instructions. 1583 if (Name == "TCRETURNdi" || Name == "TCRETURNdiND" || 1584 Name == "TCRETURNri" || Name == "TCRETURNriND" || 1585 Name == "TAILJMPd" || Name == "TAILJMPdt" || 1586 Name == "TAILJMPdND" || Name == "TAILJMPdNDt" || 1587 Name == "TAILJMPr" || Name == "TAILJMPrND" || 1588 Name == "MOVr_TC") 1589 return false; 1590 1591 // Delegate ADR disassembly to the more generic ADDri/SUBri instructions. 1592 if (Name == "ADR") 1593 return false; 1594 1595 // 1596 // The following special cases are for conflict resolutions. 1597 // 1598 1599 // A8-598: VEXT 1600 // Vector Extract extracts elements from the bottom end of the second 1601 // operand vector and the top end of the first, concatenates them and 1602 // places the result in the destination vector. The elements of the 1603 // vectors are treated as being 8-bit bitfields. There is no distinction 1604 // between data types. The size of the operation can be specified in 1605 // assembler as vext.size. If the value is 16, 32, or 64, the syntax is 1606 // a pseudo-instruction for a VEXT instruction specifying the equivalent 1607 // number of bytes. 1608 // 1609 // Variants VEXTd16, VEXTd32, VEXTd8, and VEXTdf are reduced to VEXTd8; 1610 // variants VEXTq16, VEXTq32, VEXTq8, and VEXTqf are reduced to VEXTq8. 1611 if (Name == "VEXTd16" || Name == "VEXTd32" || Name == "VEXTdf" || 1612 Name == "VEXTq16" || Name == "VEXTq32" || Name == "VEXTqf") 1613 return false; 1614 } else if (TN == TARGET_THUMB) { 1615 if (!thumbInstruction(Form)) 1616 return false; 1617 1618 // A8.6.25 BX. Use the generic tBX_Rm, ignore tBX_RET and tBX_RET_vararg. 1619 if (Name == "tBX_RET" || Name == "tBX_RET_vararg") 1620 return false; 1621 1622 // Ignore tADR, prefer tADDrPCi. 1623 if (Name == "tADR") 1624 return false; 1625 1626 // Delegate t2ADR disassembly to the more generic t2ADDri12/t2SUBri12 1627 // instructions. 1628 if (Name == "t2ADR") 1629 return false; 1630 1631 // Ignore tADDrSP, tADDspr, and tPICADD, prefer the generic tADDhirr. 1632 // Ignore t2SUBrSPs, prefer the t2SUB[S]r[r|s]. 1633 // Ignore t2ADDrSPs, prefer the t2ADD[S]r[r|s]. 1634 if (Name == "tADDrSP" || Name == "tADDspr" || Name == "tPICADD" || 1635 Name == "t2SUBrSPs" || Name == "t2ADDrSPs") 1636 return false; 1637 1638 // FIXME: Use ldr.n to work around a Darwin assembler bug. 1639 // Introduce a workaround with tLDRpciDIS opcode. 1640 if (Name == "tLDRpci") 1641 return false; 1642 1643 // Ignore t2LDRDpci, prefer the generic t2LDRDi8, t2LDRD_PRE, t2LDRD_POST. 1644 if (Name == "t2LDRDpci") 1645 return false; 1646 1647 // Resolve conflicts: 1648 // 1649 // t2LDMIA_RET conflict with t2LDM (ditto) 1650 // tMOVCCi conflicts with tMOVi8 1651 // tMOVCCr conflicts with tMOVgpr2gpr 1652 // tLDRcp conflicts with tLDRspi 1653 // t2MOVCCi16 conflicts with tMOVi16 1654 if (Name == "t2LDMIA_RET" || 1655 Name == "tMOVCCi" || Name == "tMOVCCr" || 1656 Name == "tLDRcp" || 1657 Name == "t2MOVCCi16") 1658 return false; 1659 } 1660 1661 DEBUG({ 1662 // Dumps the instruction encoding format. 1663 switch (TargetName) { 1664 case TARGET_ARM: 1665 case TARGET_THUMB: 1666 errs() << Name << " " << stringForARMFormat((ARMFormat)Form); 1667 break; 1668 } 1669 1670 errs() << " "; 1671 1672 // Dumps the instruction encoding bits. 1673 dumpBits(errs(), Bits); 1674 1675 errs() << '\n'; 1676 1677 // Dumps the list of operand info. 1678 for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { 1679 const CGIOperandList::OperandInfo &Info = CGI.Operands[i]; 1680 const std::string &OperandName = Info.Name; 1681 const Record &OperandDef = *Info.Rec; 1682 1683 errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n"; 1684 } 1685 }); 1686 1687 return true; 1688 } 1689 1690 void ARMDecoderEmitter::ARMDEBackend::populateInstructions() { 1691 getInstructionsByEnumValue(NumberedInstructions); 1692 1693 unsigned numUIDs = NumberedInstructions.size(); 1694 if (TargetName == TARGET_ARM) { 1695 for (unsigned uid = 0; uid < numUIDs; uid++) { 1696 // filter out intrinsics 1697 if (!NumberedInstructions[uid]->TheDef->isSubClassOf("InstARM")) 1698 continue; 1699 1700 if (populateInstruction(*NumberedInstructions[uid], TargetName)) 1701 Opcodes.push_back(uid); 1702 } 1703 1704 // Special handling for the ARM chip, which supports two modes of execution. 1705 // This branch handles the Thumb opcodes. 1706 for (unsigned uid = 0; uid < numUIDs; uid++) { 1707 // filter out intrinsics 1708 if (!NumberedInstructions[uid]->TheDef->isSubClassOf("InstARM") 1709 && !NumberedInstructions[uid]->TheDef->isSubClassOf("InstThumb")) 1710 continue; 1711 1712 if (populateInstruction(*NumberedInstructions[uid], TARGET_THUMB)) 1713 Opcodes2.push_back(uid); 1714 } 1715 1716 return; 1717 } 1718 1719 // For other targets. 1720 for (unsigned uid = 0; uid < numUIDs; uid++) { 1721 Record *R = NumberedInstructions[uid]->TheDef; 1722 if (R->getValueAsString("Namespace") == "TargetOpcode") 1723 continue; 1724 1725 if (populateInstruction(*NumberedInstructions[uid], TargetName)) 1726 Opcodes.push_back(uid); 1727 } 1728 } 1729 1730 // Emits disassembler code for instruction decoding. This delegates to the 1731 // FilterChooser instance to do the heavy lifting. 1732 void ARMDecoderEmitter::ARMDEBackend::emit(raw_ostream &o) { 1733 switch (TargetName) { 1734 case TARGET_ARM: 1735 Frontend.EmitSourceFileHeader("ARM/Thumb Decoders", o); 1736 break; 1737 default: 1738 assert(0 && "Unreachable code!"); 1739 } 1740 1741 o << "#include \"llvm/Support/DataTypes.h\"\n"; 1742 o << "#include <assert.h>\n"; 1743 o << '\n'; 1744 o << "namespace llvm {\n\n"; 1745 1746 ARMFilterChooser::setTargetName(TargetName); 1747 1748 switch (TargetName) { 1749 case TARGET_ARM: { 1750 // Emit common utility and ARM ISA decoder. 1751 FC = new ARMFilterChooser(NumberedInstructions, Opcodes); 1752 // Reset indentation level. 1753 unsigned Indentation = 0; 1754 FC->emitTop(o, Indentation); 1755 delete FC; 1756 1757 // Emit Thumb ISA decoder as well. 1758 ARMFilterChooser::setTargetName(TARGET_THUMB); 1759 FC = new ARMFilterChooser(NumberedInstructions, Opcodes2); 1760 // Reset indentation level. 1761 Indentation = 0; 1762 FC->emitBot(o, Indentation); 1763 break; 1764 } 1765 default: 1766 assert(0 && "Unreachable code!"); 1767 } 1768 1769 o << "\n} // End llvm namespace \n"; 1770 } 1771 1772 ///////////////////////// 1773 // Backend interface // 1774 ///////////////////////// 1775 1776 void ARMDecoderEmitter::initBackend() 1777 { 1778 Backend = new ARMDEBackend(*this, Records); 1779 } 1780 1781 void ARMDecoderEmitter::run(raw_ostream &o) 1782 { 1783 Backend->emit(o); 1784 } 1785 1786 void ARMDecoderEmitter::shutdownBackend() 1787 { 1788 delete Backend; 1789 Backend = NULL; 1790 } 1791