1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This header defines the BitstreamReader class. This class can be used to 11 // read an arbitrary bitstream, regardless of its contents. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_BITCODE_BITSTREAMREADER_H 16 #define LLVM_BITCODE_BITSTREAMREADER_H 17 18 #include "llvm/Bitcode/BitCodes.h" 19 #include "llvm/Support/Endian.h" 20 #include "llvm/Support/StreamingMemoryObject.h" 21 #include <climits> 22 #include <string> 23 #include <vector> 24 25 namespace llvm { 26 27 /// This class is used to read from an LLVM bitcode stream, maintaining 28 /// information that is global to decoding the entire file. While a file is 29 /// being read, multiple cursors can be independently advanced or skipped around 30 /// within the file. These are represented by the BitstreamCursor class. 31 class BitstreamReader { 32 public: 33 /// This contains information emitted to BLOCKINFO_BLOCK blocks. These 34 /// describe abbreviations that all blocks of the specified ID inherit. 35 struct BlockInfo { 36 unsigned BlockID; 37 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs; 38 std::string Name; 39 40 std::vector<std::pair<unsigned, std::string> > RecordNames; 41 }; 42 private: 43 std::unique_ptr<MemoryObject> BitcodeBytes; 44 45 std::vector<BlockInfo> BlockInfoRecords; 46 47 /// This is set to true if we don't care about the block/record name 48 /// information in the BlockInfo block. Only llvm-bcanalyzer uses this. 49 bool IgnoreBlockInfoNames; 50 51 BitstreamReader(const BitstreamReader&) = delete; 52 void operator=(const BitstreamReader&) = delete; 53 public: 54 BitstreamReader() : IgnoreBlockInfoNames(true) { 55 } 56 57 BitstreamReader(const unsigned char *Start, const unsigned char *End) 58 : IgnoreBlockInfoNames(true) { 59 init(Start, End); 60 } 61 62 BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes) 63 : BitcodeBytes(std::move(BitcodeBytes)), IgnoreBlockInfoNames(true) {} 64 65 BitstreamReader(BitstreamReader &&Other) { 66 *this = std::move(Other); 67 } 68 69 BitstreamReader &operator=(BitstreamReader &&Other) { 70 BitcodeBytes = std::move(Other.BitcodeBytes); 71 // Explicitly swap block info, so that nothing gets destroyed twice. 72 std::swap(BlockInfoRecords, Other.BlockInfoRecords); 73 IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames; 74 return *this; 75 } 76 77 void init(const unsigned char *Start, const unsigned char *End) { 78 assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); 79 BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End)); 80 } 81 82 MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } 83 84 /// This is called by clients that want block/record name information. 85 void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; } 86 bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; } 87 88 //===--------------------------------------------------------------------===// 89 // Block Manipulation 90 //===--------------------------------------------------------------------===// 91 92 /// Return true if we've already read and processed the block info block for 93 /// this Bitstream. We only process it for the first cursor that walks over 94 /// it. 95 bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); } 96 97 /// If there is block info for the specified ID, return it, otherwise return 98 /// null. 99 const BlockInfo *getBlockInfo(unsigned BlockID) const { 100 // Common case, the most recent entry matches BlockID. 101 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 102 return &BlockInfoRecords.back(); 103 104 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 105 i != e; ++i) 106 if (BlockInfoRecords[i].BlockID == BlockID) 107 return &BlockInfoRecords[i]; 108 return nullptr; 109 } 110 111 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 112 if (const BlockInfo *BI = getBlockInfo(BlockID)) 113 return *const_cast<BlockInfo*>(BI); 114 115 // Otherwise, add a new record. 116 BlockInfoRecords.emplace_back(); 117 BlockInfoRecords.back().BlockID = BlockID; 118 return BlockInfoRecords.back(); 119 } 120 121 /// Takes block info from the other bitstream reader. 122 /// 123 /// This is a "take" operation because BlockInfo records are non-trivial, and 124 /// indeed rather expensive. 125 void takeBlockInfo(BitstreamReader &&Other) { 126 assert(!hasBlockInfoRecords()); 127 BlockInfoRecords = std::move(Other.BlockInfoRecords); 128 } 129 }; 130 131 /// When advancing through a bitstream cursor, each advance can discover a few 132 /// different kinds of entries: 133 struct BitstreamEntry { 134 enum { 135 Error, // Malformed bitcode was found. 136 EndBlock, // We've reached the end of the current block, (or the end of the 137 // file, which is treated like a series of EndBlock records. 138 SubBlock, // This is the start of a new subblock of a specific ID. 139 Record // This is a record with a specific AbbrevID. 140 } Kind; 141 142 unsigned ID; 143 144 static BitstreamEntry getError() { 145 BitstreamEntry E; E.Kind = Error; return E; 146 } 147 static BitstreamEntry getEndBlock() { 148 BitstreamEntry E; E.Kind = EndBlock; return E; 149 } 150 static BitstreamEntry getSubBlock(unsigned ID) { 151 BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; 152 } 153 static BitstreamEntry getRecord(unsigned AbbrevID) { 154 BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; 155 } 156 }; 157 158 /// This represents a position within a bitcode file. There may be multiple 159 /// independent cursors reading within one bitstream, each maintaining their own 160 /// local state. 161 /// 162 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not 163 /// be passed by value. 164 class BitstreamCursor { 165 BitstreamReader *BitStream; 166 size_t NextChar; 167 168 // The size of the bicode. 0 if we don't know it yet. 169 size_t Size; 170 171 /// This is the current data we have pulled from the stream but have not 172 /// returned to the client. This is specifically and intentionally defined to 173 /// follow the word size of the host machine for efficiency. We use word_t in 174 /// places that are aware of this to make it perfectly explicit what is going 175 /// on. 176 typedef size_t word_t; 177 word_t CurWord; 178 179 /// This is the number of bits in CurWord that are valid. This is always from 180 /// [0...bits_of(size_t)-1] inclusive. 181 unsigned BitsInCurWord; 182 183 // This is the declared size of code values used for the current block, in 184 // bits. 185 unsigned CurCodeSize; 186 187 /// Abbrevs installed at in this block. 188 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs; 189 190 struct Block { 191 unsigned PrevCodeSize; 192 std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs; 193 explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} 194 }; 195 196 /// This tracks the codesize of parent blocks. 197 SmallVector<Block, 8> BlockScope; 198 199 200 public: 201 static const size_t MaxChunkSize = sizeof(word_t) * 8; 202 203 BitstreamCursor() { init(nullptr); } 204 205 explicit BitstreamCursor(BitstreamReader &R) { init(&R); } 206 207 void init(BitstreamReader *R) { 208 freeState(); 209 210 BitStream = R; 211 NextChar = 0; 212 Size = 0; 213 BitsInCurWord = 0; 214 CurCodeSize = 2; 215 } 216 217 void freeState(); 218 219 bool canSkipToPos(size_t pos) const { 220 // pos can be skipped to if it is a valid address or one byte past the end. 221 return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( 222 static_cast<uint64_t>(pos - 1)); 223 } 224 225 bool AtEndOfStream() { 226 if (BitsInCurWord != 0) 227 return false; 228 if (Size != 0) 229 return Size == NextChar; 230 fillCurWord(); 231 return BitsInCurWord == 0; 232 } 233 234 /// Return the number of bits used to encode an abbrev #. 235 unsigned getAbbrevIDWidth() const { return CurCodeSize; } 236 237 /// Return the bit # of the bit we are reading. 238 uint64_t GetCurrentBitNo() const { 239 return NextChar*CHAR_BIT - BitsInCurWord; 240 } 241 242 BitstreamReader *getBitStreamReader() { 243 return BitStream; 244 } 245 const BitstreamReader *getBitStreamReader() const { 246 return BitStream; 247 } 248 249 /// Flags that modify the behavior of advance(). 250 enum { 251 /// If this flag is used, the advance() method does not automatically pop 252 /// the block scope when the end of a block is reached. 253 AF_DontPopBlockAtEnd = 1, 254 255 /// If this flag is used, abbrev entries are returned just like normal 256 /// records. 257 AF_DontAutoprocessAbbrevs = 2 258 }; 259 260 /// Advance the current bitstream, returning the next entry in the stream. 261 BitstreamEntry advance(unsigned Flags = 0) { 262 while (1) { 263 unsigned Code = ReadCode(); 264 if (Code == bitc::END_BLOCK) { 265 // Pop the end of the block unless Flags tells us not to. 266 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) 267 return BitstreamEntry::getError(); 268 return BitstreamEntry::getEndBlock(); 269 } 270 271 if (Code == bitc::ENTER_SUBBLOCK) 272 return BitstreamEntry::getSubBlock(ReadSubBlockID()); 273 274 if (Code == bitc::DEFINE_ABBREV && 275 !(Flags & AF_DontAutoprocessAbbrevs)) { 276 // We read and accumulate abbrev's, the client can't do anything with 277 // them anyway. 278 ReadAbbrevRecord(); 279 continue; 280 } 281 282 return BitstreamEntry::getRecord(Code); 283 } 284 } 285 286 /// This is a convenience function for clients that don't expect any 287 /// subblocks. This just skips over them automatically. 288 BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { 289 while (1) { 290 // If we found a normal entry, return it. 291 BitstreamEntry Entry = advance(Flags); 292 if (Entry.Kind != BitstreamEntry::SubBlock) 293 return Entry; 294 295 // If we found a sub-block, just skip over it and check the next entry. 296 if (SkipBlock()) 297 return BitstreamEntry::getError(); 298 } 299 } 300 301 /// Reset the stream to the specified bit number. 302 void JumpToBit(uint64_t BitNo) { 303 size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1); 304 unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); 305 assert(canSkipToPos(ByteNo) && "Invalid location"); 306 307 // Move the cursor to the right word. 308 NextChar = ByteNo; 309 BitsInCurWord = 0; 310 311 // Skip over any bits that are already consumed. 312 if (WordBitNo) 313 Read(WordBitNo); 314 } 315 316 void fillCurWord() { 317 if (Size != 0 && NextChar >= Size) 318 report_fatal_error("Unexpected end of file"); 319 320 // Read the next word from the stream. 321 uint8_t Array[sizeof(word_t)] = {0}; 322 323 uint64_t BytesRead = 324 BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar); 325 326 // If we run out of data, stop at the end of the stream. 327 if (BytesRead == 0) { 328 CurWord = 0; 329 BitsInCurWord = 0; 330 Size = NextChar; 331 return; 332 } 333 334 CurWord = 335 support::endian::read<word_t, support::little, support::unaligned>( 336 Array); 337 NextChar += BytesRead; 338 BitsInCurWord = BytesRead * 8; 339 } 340 341 word_t Read(unsigned NumBits) { 342 static const unsigned BitsInWord = MaxChunkSize; 343 344 assert(NumBits && NumBits <= BitsInWord && 345 "Cannot return zero or more than BitsInWord bits!"); 346 347 static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; 348 349 // If the field is fully contained by CurWord, return it quickly. 350 if (BitsInCurWord >= NumBits) { 351 word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); 352 353 // Use a mask to avoid undefined behavior. 354 CurWord >>= (NumBits & Mask); 355 356 BitsInCurWord -= NumBits; 357 return R; 358 } 359 360 word_t R = BitsInCurWord ? CurWord : 0; 361 unsigned BitsLeft = NumBits - BitsInCurWord; 362 363 fillCurWord(); 364 365 // If we run out of data, stop at the end of the stream. 366 if (BitsLeft > BitsInCurWord) 367 return 0; 368 369 word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); 370 371 // Use a mask to avoid undefined behavior. 372 CurWord >>= (BitsLeft & Mask); 373 374 BitsInCurWord -= BitsLeft; 375 376 R |= R2 << (NumBits - BitsLeft); 377 378 return R; 379 } 380 381 uint32_t ReadVBR(unsigned NumBits) { 382 uint32_t Piece = Read(NumBits); 383 if ((Piece & (1U << (NumBits-1))) == 0) 384 return Piece; 385 386 uint32_t Result = 0; 387 unsigned NextBit = 0; 388 while (1) { 389 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; 390 391 if ((Piece & (1U << (NumBits-1))) == 0) 392 return Result; 393 394 NextBit += NumBits-1; 395 Piece = Read(NumBits); 396 } 397 } 398 399 // Read a VBR that may have a value up to 64-bits in size. The chunk size of 400 // the VBR must still be <= 32 bits though. 401 uint64_t ReadVBR64(unsigned NumBits) { 402 uint32_t Piece = Read(NumBits); 403 if ((Piece & (1U << (NumBits-1))) == 0) 404 return uint64_t(Piece); 405 406 uint64_t Result = 0; 407 unsigned NextBit = 0; 408 while (1) { 409 Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; 410 411 if ((Piece & (1U << (NumBits-1))) == 0) 412 return Result; 413 414 NextBit += NumBits-1; 415 Piece = Read(NumBits); 416 } 417 } 418 419 private: 420 void SkipToFourByteBoundary() { 421 // If word_t is 64-bits and if we've read less than 32 bits, just dump 422 // the bits we have up to the next 32-bit boundary. 423 if (sizeof(word_t) > 4 && 424 BitsInCurWord >= 32) { 425 CurWord >>= BitsInCurWord-32; 426 BitsInCurWord = 32; 427 return; 428 } 429 430 BitsInCurWord = 0; 431 } 432 public: 433 434 unsigned ReadCode() { 435 return Read(CurCodeSize); 436 } 437 438 439 // Block header: 440 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 441 442 /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. 443 unsigned ReadSubBlockID() { 444 return ReadVBR(bitc::BlockIDWidth); 445 } 446 447 /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body 448 /// of this block. If the block record is malformed, return true. 449 bool SkipBlock() { 450 // Read and ignore the codelen value. Since we are skipping this block, we 451 // don't care what code widths are used inside of it. 452 ReadVBR(bitc::CodeLenWidth); 453 SkipToFourByteBoundary(); 454 unsigned NumFourBytes = Read(bitc::BlockSizeWidth); 455 456 // Check that the block wasn't partially defined, and that the offset isn't 457 // bogus. 458 size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8; 459 if (AtEndOfStream() || !canSkipToPos(SkipTo/8)) 460 return true; 461 462 JumpToBit(SkipTo); 463 return false; 464 } 465 466 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true 467 /// if the block has an error. 468 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); 469 470 bool ReadBlockEnd() { 471 if (BlockScope.empty()) return true; 472 473 // Block tail: 474 // [END_BLOCK, <align4bytes>] 475 SkipToFourByteBoundary(); 476 477 popBlockScope(); 478 return false; 479 } 480 481 private: 482 483 void popBlockScope() { 484 CurCodeSize = BlockScope.back().PrevCodeSize; 485 486 CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs); 487 BlockScope.pop_back(); 488 } 489 490 //===--------------------------------------------------------------------===// 491 // Record Processing 492 //===--------------------------------------------------------------------===// 493 494 public: 495 /// Return the abbreviation for the specified AbbrevId. 496 const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) { 497 unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV; 498 if (AbbrevNo >= CurAbbrevs.size()) 499 report_fatal_error("Invalid abbrev number"); 500 return CurAbbrevs[AbbrevNo].get(); 501 } 502 503 /// Read the current record and discard it. 504 void skipRecord(unsigned AbbrevID); 505 506 unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals, 507 StringRef *Blob = nullptr); 508 509 //===--------------------------------------------------------------------===// 510 // Abbrev Processing 511 //===--------------------------------------------------------------------===// 512 void ReadAbbrevRecord(); 513 514 bool ReadBlockInfoBlock(); 515 }; 516 517 } // End llvm namespace 518 519 #endif 520