1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This header defines the BitstreamReader class. This class can be used to 11 // read an arbitrary bitstream, regardless of its contents. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_BITCODE_BITSTREAMREADER_H 16 #define LLVM_BITCODE_BITSTREAMREADER_H 17 18 #include "llvm/Bitcode/BitCodes.h" 19 #include "llvm/Support/Endian.h" 20 #include "llvm/Support/StreamableMemoryObject.h" 21 #include <climits> 22 #include <string> 23 #include <vector> 24 25 namespace llvm { 26 27 class Deserializer; 28 29 /// BitstreamReader - This class is used to read from an LLVM bitcode stream, 30 /// maintaining information that is global to decoding the entire file. While 31 /// a file is being read, multiple cursors can be independently advanced or 32 /// skipped around within the file. These are represented by the 33 /// BitstreamCursor class. 34 class BitstreamReader { 35 public: 36 /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. 37 /// These describe abbreviations that all blocks of the specified ID inherit. 38 struct BlockInfo { 39 unsigned BlockID; 40 std::vector<BitCodeAbbrev*> Abbrevs; 41 std::string Name; 42 43 std::vector<std::pair<unsigned, std::string> > RecordNames; 44 }; 45 private: 46 std::unique_ptr<StreamableMemoryObject> BitcodeBytes; 47 48 std::vector<BlockInfo> BlockInfoRecords; 49 50 /// IgnoreBlockInfoNames - This is set to true if we don't care about the 51 /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer 52 /// uses this. 53 bool IgnoreBlockInfoNames; 54 55 BitstreamReader(const BitstreamReader&) LLVM_DELETED_FUNCTION; 56 void operator=(const BitstreamReader&) LLVM_DELETED_FUNCTION; 57 public: 58 BitstreamReader() : IgnoreBlockInfoNames(true) { 59 } 60 61 BitstreamReader(const unsigned char *Start, const unsigned char *End) { 62 IgnoreBlockInfoNames = true; 63 init(Start, End); 64 } 65 66 BitstreamReader(StreamableMemoryObject *bytes) { 67 BitcodeBytes.reset(bytes); 68 } 69 70 void init(const unsigned char *Start, const unsigned char *End) { 71 assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); 72 BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End)); 73 } 74 75 StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; } 76 77 ~BitstreamReader() { 78 // Free the BlockInfoRecords. 79 while (!BlockInfoRecords.empty()) { 80 BlockInfo &Info = BlockInfoRecords.back(); 81 // Free blockinfo abbrev info. 82 for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size()); 83 i != e; ++i) 84 Info.Abbrevs[i]->dropRef(); 85 BlockInfoRecords.pop_back(); 86 } 87 } 88 89 /// CollectBlockInfoNames - This is called by clients that want block/record 90 /// name information. 91 void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; } 92 bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; } 93 94 //===--------------------------------------------------------------------===// 95 // Block Manipulation 96 //===--------------------------------------------------------------------===// 97 98 /// hasBlockInfoRecords - Return true if we've already read and processed the 99 /// block info block for this Bitstream. We only process it for the first 100 /// cursor that walks over it. 101 bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); } 102 103 /// getBlockInfo - If there is block info for the specified ID, return it, 104 /// otherwise return null. 105 const BlockInfo *getBlockInfo(unsigned BlockID) const { 106 // Common case, the most recent entry matches BlockID. 107 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 108 return &BlockInfoRecords.back(); 109 110 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 111 i != e; ++i) 112 if (BlockInfoRecords[i].BlockID == BlockID) 113 return &BlockInfoRecords[i]; 114 return nullptr; 115 } 116 117 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 118 if (const BlockInfo *BI = getBlockInfo(BlockID)) 119 return *const_cast<BlockInfo*>(BI); 120 121 // Otherwise, add a new record. 122 BlockInfoRecords.push_back(BlockInfo()); 123 BlockInfoRecords.back().BlockID = BlockID; 124 return BlockInfoRecords.back(); 125 } 126 }; 127 128 129 /// BitstreamEntry - When advancing through a bitstream cursor, each advance can 130 /// discover a few different kinds of entries: 131 /// Error - Malformed bitcode was found. 132 /// EndBlock - We've reached the end of the current block, (or the end of the 133 /// file, which is treated like a series of EndBlock records. 134 /// SubBlock - This is the start of a new subblock of a specific ID. 135 /// Record - This is a record with a specific AbbrevID. 136 /// 137 struct BitstreamEntry { 138 enum { 139 Error, 140 EndBlock, 141 SubBlock, 142 Record 143 } Kind; 144 145 unsigned ID; 146 147 static BitstreamEntry getError() { 148 BitstreamEntry E; E.Kind = Error; return E; 149 } 150 static BitstreamEntry getEndBlock() { 151 BitstreamEntry E; E.Kind = EndBlock; return E; 152 } 153 static BitstreamEntry getSubBlock(unsigned ID) { 154 BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; 155 } 156 static BitstreamEntry getRecord(unsigned AbbrevID) { 157 BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; 158 } 159 }; 160 161 /// BitstreamCursor - This represents a position within a bitcode file. There 162 /// may be multiple independent cursors reading within one bitstream, each 163 /// maintaining their own local state. 164 /// 165 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not 166 /// be passed by value. 167 class BitstreamCursor { 168 friend class Deserializer; 169 BitstreamReader *BitStream; 170 size_t NextChar; 171 172 173 /// CurWord/word_t - This is the current data we have pulled from the stream 174 /// but have not returned to the client. This is specifically and 175 /// intentionally defined to follow the word size of the host machine for 176 /// efficiency. We use word_t in places that are aware of this to make it 177 /// perfectly explicit what is going on. 178 typedef uint32_t word_t; 179 word_t CurWord; 180 181 /// BitsInCurWord - This is the number of bits in CurWord that are valid. This 182 /// is always from [0...31/63] inclusive (depending on word size). 183 unsigned BitsInCurWord; 184 185 // CurCodeSize - This is the declared size of code values used for the current 186 // block, in bits. 187 unsigned CurCodeSize; 188 189 /// CurAbbrevs - Abbrevs installed at in this block. 190 std::vector<BitCodeAbbrev*> CurAbbrevs; 191 192 struct Block { 193 unsigned PrevCodeSize; 194 std::vector<BitCodeAbbrev*> PrevAbbrevs; 195 explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} 196 }; 197 198 /// BlockScope - This tracks the codesize of parent blocks. 199 SmallVector<Block, 8> BlockScope; 200 201 202 public: 203 BitstreamCursor() : BitStream(nullptr), NextChar(0) {} 204 BitstreamCursor(const BitstreamCursor &RHS) 205 : BitStream(nullptr), NextChar(0) { 206 operator=(RHS); 207 } 208 209 explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) { 210 NextChar = 0; 211 CurWord = 0; 212 BitsInCurWord = 0; 213 CurCodeSize = 2; 214 } 215 216 void init(BitstreamReader &R) { 217 freeState(); 218 219 BitStream = &R; 220 NextChar = 0; 221 CurWord = 0; 222 BitsInCurWord = 0; 223 CurCodeSize = 2; 224 } 225 226 ~BitstreamCursor() { 227 freeState(); 228 } 229 230 void operator=(const BitstreamCursor &RHS); 231 232 void freeState(); 233 234 bool isEndPos(size_t pos) { 235 return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos)); 236 } 237 238 bool canSkipToPos(size_t pos) const { 239 // pos can be skipped to if it is a valid address or one byte past the end. 240 return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( 241 static_cast<uint64_t>(pos - 1)); 242 } 243 244 uint32_t getWord(size_t pos) { 245 uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF }; 246 BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf); 247 return *reinterpret_cast<support::ulittle32_t *>(buf); 248 } 249 250 bool AtEndOfStream() { 251 return BitsInCurWord == 0 && isEndPos(NextChar); 252 } 253 254 /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #. 255 unsigned getAbbrevIDWidth() const { return CurCodeSize; } 256 257 /// GetCurrentBitNo - Return the bit # of the bit we are reading. 258 uint64_t GetCurrentBitNo() const { 259 return NextChar*CHAR_BIT - BitsInCurWord; 260 } 261 262 BitstreamReader *getBitStreamReader() { 263 return BitStream; 264 } 265 const BitstreamReader *getBitStreamReader() const { 266 return BitStream; 267 } 268 269 /// Flags that modify the behavior of advance(). 270 enum { 271 /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does 272 /// not automatically pop the block scope when the end of a block is 273 /// reached. 274 AF_DontPopBlockAtEnd = 1, 275 276 /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are 277 /// returned just like normal records. 278 AF_DontAutoprocessAbbrevs = 2 279 }; 280 281 /// advance - Advance the current bitstream, returning the next entry in the 282 /// stream. 283 BitstreamEntry advance(unsigned Flags = 0) { 284 while (1) { 285 unsigned Code = ReadCode(); 286 if (Code == bitc::END_BLOCK) { 287 // Pop the end of the block unless Flags tells us not to. 288 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) 289 return BitstreamEntry::getError(); 290 return BitstreamEntry::getEndBlock(); 291 } 292 293 if (Code == bitc::ENTER_SUBBLOCK) 294 return BitstreamEntry::getSubBlock(ReadSubBlockID()); 295 296 if (Code == bitc::DEFINE_ABBREV && 297 !(Flags & AF_DontAutoprocessAbbrevs)) { 298 // We read and accumulate abbrev's, the client can't do anything with 299 // them anyway. 300 ReadAbbrevRecord(); 301 continue; 302 } 303 304 return BitstreamEntry::getRecord(Code); 305 } 306 } 307 308 /// advanceSkippingSubblocks - This is a convenience function for clients that 309 /// don't expect any subblocks. This just skips over them automatically. 310 BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { 311 while (1) { 312 // If we found a normal entry, return it. 313 BitstreamEntry Entry = advance(Flags); 314 if (Entry.Kind != BitstreamEntry::SubBlock) 315 return Entry; 316 317 // If we found a sub-block, just skip over it and check the next entry. 318 if (SkipBlock()) 319 return BitstreamEntry::getError(); 320 } 321 } 322 323 /// JumpToBit - Reset the stream to the specified bit number. 324 void JumpToBit(uint64_t BitNo) { 325 uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1); 326 unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); 327 assert(canSkipToPos(ByteNo) && "Invalid location"); 328 329 // Move the cursor to the right word. 330 NextChar = ByteNo; 331 BitsInCurWord = 0; 332 CurWord = 0; 333 334 // Skip over any bits that are already consumed. 335 if (WordBitNo) { 336 if (sizeof(word_t) > 4) 337 Read64(WordBitNo); 338 else 339 Read(WordBitNo); 340 } 341 } 342 343 344 uint32_t Read(unsigned NumBits) { 345 assert(NumBits && NumBits <= 32 && 346 "Cannot return zero or more than 32 bits!"); 347 348 // If the field is fully contained by CurWord, return it quickly. 349 if (BitsInCurWord >= NumBits) { 350 uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits)); 351 CurWord >>= NumBits; 352 BitsInCurWord -= NumBits; 353 return R; 354 } 355 356 // If we run out of data, stop at the end of the stream. 357 if (isEndPos(NextChar)) { 358 CurWord = 0; 359 BitsInCurWord = 0; 360 return 0; 361 } 362 363 uint32_t R = uint32_t(CurWord); 364 365 // Read the next word from the stream. 366 uint8_t Array[sizeof(word_t)] = {0}; 367 368 BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array), Array); 369 370 // Handle big-endian byte-swapping if necessary. 371 support::detail::packed_endian_specific_integral 372 <word_t, support::little, support::unaligned> EndianValue; 373 memcpy(&EndianValue, Array, sizeof(Array)); 374 375 CurWord = EndianValue; 376 377 NextChar += sizeof(word_t); 378 379 // Extract NumBits-BitsInCurWord from what we just read. 380 unsigned BitsLeft = NumBits-BitsInCurWord; 381 382 // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive. 383 R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft))) 384 << BitsInCurWord); 385 386 // BitsLeft bits have just been used up from CurWord. BitsLeft is in the 387 // range [1..32]/[1..64] so be careful how we shift. 388 if (BitsLeft != sizeof(word_t)*8) 389 CurWord >>= BitsLeft; 390 else 391 CurWord = 0; 392 BitsInCurWord = sizeof(word_t)*8-BitsLeft; 393 return R; 394 } 395 396 uint64_t Read64(unsigned NumBits) { 397 if (NumBits <= 32) return Read(NumBits); 398 399 uint64_t V = Read(32); 400 return V | (uint64_t)Read(NumBits-32) << 32; 401 } 402 403 uint32_t ReadVBR(unsigned NumBits) { 404 uint32_t Piece = Read(NumBits); 405 if ((Piece & (1U << (NumBits-1))) == 0) 406 return Piece; 407 408 uint32_t Result = 0; 409 unsigned NextBit = 0; 410 while (1) { 411 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 412 413 if ((Piece & (1U << (NumBits-1))) == 0) 414 return Result; 415 416 NextBit += NumBits-1; 417 Piece = Read(NumBits); 418 } 419 } 420 421 // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size. The 422 // chunk size of the VBR must still be <= 32 bits though. 423 uint64_t ReadVBR64(unsigned NumBits) { 424 uint32_t Piece = Read(NumBits); 425 if ((Piece & (1U << (NumBits-1))) == 0) 426 return uint64_t(Piece); 427 428 uint64_t Result = 0; 429 unsigned NextBit = 0; 430 while (1) { 431 Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; 432 433 if ((Piece & (1U << (NumBits-1))) == 0) 434 return Result; 435 436 NextBit += NumBits-1; 437 Piece = Read(NumBits); 438 } 439 } 440 441 private: 442 void SkipToFourByteBoundary() { 443 // If word_t is 64-bits and if we've read less than 32 bits, just dump 444 // the bits we have up to the next 32-bit boundary. 445 if (sizeof(word_t) > 4 && 446 BitsInCurWord >= 32) { 447 CurWord >>= BitsInCurWord-32; 448 BitsInCurWord = 32; 449 return; 450 } 451 452 BitsInCurWord = 0; 453 CurWord = 0; 454 } 455 public: 456 457 unsigned ReadCode() { 458 return Read(CurCodeSize); 459 } 460 461 462 // Block header: 463 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 464 465 /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for 466 /// the block. 467 unsigned ReadSubBlockID() { 468 return ReadVBR(bitc::BlockIDWidth); 469 } 470 471 /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip 472 /// over the body of this block. If the block record is malformed, return 473 /// true. 474 bool SkipBlock() { 475 // Read and ignore the codelen value. Since we are skipping this block, we 476 // don't care what code widths are used inside of it. 477 ReadVBR(bitc::CodeLenWidth); 478 SkipToFourByteBoundary(); 479 unsigned NumFourBytes = Read(bitc::BlockSizeWidth); 480 481 // Check that the block wasn't partially defined, and that the offset isn't 482 // bogus. 483 size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8; 484 if (AtEndOfStream() || !canSkipToPos(SkipTo/8)) 485 return true; 486 487 JumpToBit(SkipTo); 488 return false; 489 } 490 491 /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter 492 /// the block, and return true if the block has an error. 493 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); 494 495 bool ReadBlockEnd() { 496 if (BlockScope.empty()) return true; 497 498 // Block tail: 499 // [END_BLOCK, <align4bytes>] 500 SkipToFourByteBoundary(); 501 502 popBlockScope(); 503 return false; 504 } 505 506 private: 507 508 void popBlockScope() { 509 CurCodeSize = BlockScope.back().PrevCodeSize; 510 511 // Delete abbrevs from popped scope. 512 for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size()); 513 i != e; ++i) 514 CurAbbrevs[i]->dropRef(); 515 516 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 517 BlockScope.pop_back(); 518 } 519 520 //===--------------------------------------------------------------------===// 521 // Record Processing 522 //===--------------------------------------------------------------------===// 523 524 private: 525 void readAbbreviatedLiteral(const BitCodeAbbrevOp &Op, 526 SmallVectorImpl<uint64_t> &Vals); 527 void readAbbreviatedField(const BitCodeAbbrevOp &Op, 528 SmallVectorImpl<uint64_t> &Vals); 529 void skipAbbreviatedField(const BitCodeAbbrevOp &Op); 530 531 public: 532 533 /// getAbbrev - Return the abbreviation for the specified AbbrevId. 534 const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) { 535 unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV; 536 assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); 537 return CurAbbrevs[AbbrevNo]; 538 } 539 540 /// skipRecord - Read the current record and discard it. 541 void skipRecord(unsigned AbbrevID); 542 543 unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals, 544 StringRef *Blob = nullptr); 545 546 //===--------------------------------------------------------------------===// 547 // Abbrev Processing 548 //===--------------------------------------------------------------------===// 549 void ReadAbbrevRecord(); 550 551 bool ReadBlockInfoBlock(); 552 }; 553 554 } // End llvm namespace 555 556 #endif 557