1 //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This header defines the BitstreamWriter class. This class can be used to 11 // write an arbitrary bitstream, regardless of its contents. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_BITCODE_BITSTREAMWRITER_H 16 #define LLVM_BITCODE_BITSTREAMWRITER_H 17 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Bitcode/BitCodes.h" 21 #include <vector> 22 23 namespace llvm { 24 25 class BitstreamWriter { 26 SmallVectorImpl<char> &Out; 27 28 /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use. 29 unsigned CurBit; 30 31 /// CurValue - The current value. Only bits < CurBit are valid. 32 uint32_t CurValue; 33 34 /// CurCodeSize - This is the declared size of code values used for the 35 /// current block, in bits. 36 unsigned CurCodeSize; 37 38 /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently 39 /// selected BLOCK ID. 40 unsigned BlockInfoCurBID; 41 42 /// CurAbbrevs - Abbrevs installed at in this block. 43 std::vector<BitCodeAbbrev*> CurAbbrevs; 44 45 struct Block { 46 unsigned PrevCodeSize; 47 unsigned StartSizeWord; 48 std::vector<BitCodeAbbrev*> PrevAbbrevs; 49 Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {} 50 }; 51 52 /// BlockScope - This tracks the current blocks that we have entered. 53 std::vector<Block> BlockScope; 54 55 /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. 56 /// These describe abbreviations that all blocks of the specified ID inherit. 57 struct BlockInfo { 58 unsigned BlockID; 59 std::vector<BitCodeAbbrev*> Abbrevs; 60 }; 61 std::vector<BlockInfo> BlockInfoRecords; 62 63 // BackpatchWord - Backpatch a 32-bit word in the output with the specified 64 // value. 65 void BackpatchWord(unsigned ByteNo, unsigned NewWord) { 66 Out[ByteNo++] = (unsigned char)(NewWord >> 0); 67 Out[ByteNo++] = (unsigned char)(NewWord >> 8); 68 Out[ByteNo++] = (unsigned char)(NewWord >> 16); 69 Out[ByteNo ] = (unsigned char)(NewWord >> 24); 70 } 71 72 void WriteByte(unsigned char Value) { 73 Out.push_back(Value); 74 } 75 76 void WriteWord(unsigned Value) { 77 unsigned char Bytes[4] = { 78 (unsigned char)(Value >> 0), 79 (unsigned char)(Value >> 8), 80 (unsigned char)(Value >> 16), 81 (unsigned char)(Value >> 24) }; 82 Out.append(&Bytes[0], &Bytes[4]); 83 } 84 85 unsigned GetBufferOffset() const { 86 return Out.size(); 87 } 88 89 unsigned GetWordIndex() const { 90 unsigned Offset = GetBufferOffset(); 91 assert((Offset & 3) == 0 && "Not 32-bit aligned"); 92 return Offset / 4; 93 } 94 95 public: 96 explicit BitstreamWriter(SmallVectorImpl<char> &O) 97 : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {} 98 99 ~BitstreamWriter() { 100 assert(CurBit == 0 && "Unflushed data remaining"); 101 assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance"); 102 103 // Free the BlockInfoRecords. 104 while (!BlockInfoRecords.empty()) { 105 BlockInfo &Info = BlockInfoRecords.back(); 106 // Free blockinfo abbrev info. 107 for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size()); 108 i != e; ++i) 109 Info.Abbrevs[i]->dropRef(); 110 BlockInfoRecords.pop_back(); 111 } 112 } 113 114 /// \brief Retrieve the current position in the stream, in bits. 115 uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; } 116 117 //===--------------------------------------------------------------------===// 118 // Basic Primitives for emitting bits to the stream. 119 //===--------------------------------------------------------------------===// 120 121 void Emit(uint32_t Val, unsigned NumBits) { 122 assert(NumBits && NumBits <= 32 && "Invalid value size!"); 123 assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!"); 124 CurValue |= Val << CurBit; 125 if (CurBit + NumBits < 32) { 126 CurBit += NumBits; 127 return; 128 } 129 130 // Add the current word. 131 WriteWord(CurValue); 132 133 if (CurBit) 134 CurValue = Val >> (32-CurBit); 135 else 136 CurValue = 0; 137 CurBit = (CurBit+NumBits) & 31; 138 } 139 140 void Emit64(uint64_t Val, unsigned NumBits) { 141 if (NumBits <= 32) 142 Emit((uint32_t)Val, NumBits); 143 else { 144 Emit((uint32_t)Val, 32); 145 Emit((uint32_t)(Val >> 32), NumBits-32); 146 } 147 } 148 149 void FlushToWord() { 150 if (CurBit) { 151 WriteWord(CurValue); 152 CurBit = 0; 153 CurValue = 0; 154 } 155 } 156 157 void EmitVBR(uint32_t Val, unsigned NumBits) { 158 assert(NumBits <= 32 && "Too many bits to emit!"); 159 uint32_t Threshold = 1U << (NumBits-1); 160 161 // Emit the bits with VBR encoding, NumBits-1 bits at a time. 162 while (Val >= Threshold) { 163 Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits); 164 Val >>= NumBits-1; 165 } 166 167 Emit(Val, NumBits); 168 } 169 170 void EmitVBR64(uint64_t Val, unsigned NumBits) { 171 assert(NumBits <= 32 && "Too many bits to emit!"); 172 if ((uint32_t)Val == Val) 173 return EmitVBR((uint32_t)Val, NumBits); 174 175 uint32_t Threshold = 1U << (NumBits-1); 176 177 // Emit the bits with VBR encoding, NumBits-1 bits at a time. 178 while (Val >= Threshold) { 179 Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) | 180 (1 << (NumBits-1)), NumBits); 181 Val >>= NumBits-1; 182 } 183 184 Emit((uint32_t)Val, NumBits); 185 } 186 187 /// EmitCode - Emit the specified code. 188 void EmitCode(unsigned Val) { 189 Emit(Val, CurCodeSize); 190 } 191 192 //===--------------------------------------------------------------------===// 193 // Block Manipulation 194 //===--------------------------------------------------------------------===// 195 196 /// getBlockInfo - If there is block info for the specified ID, return it, 197 /// otherwise return null. 198 BlockInfo *getBlockInfo(unsigned BlockID) { 199 // Common case, the most recent entry matches BlockID. 200 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 201 return &BlockInfoRecords.back(); 202 203 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 204 i != e; ++i) 205 if (BlockInfoRecords[i].BlockID == BlockID) 206 return &BlockInfoRecords[i]; 207 return nullptr; 208 } 209 210 void EnterSubblock(unsigned BlockID, unsigned CodeLen) { 211 // Block header: 212 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 213 EmitCode(bitc::ENTER_SUBBLOCK); 214 EmitVBR(BlockID, bitc::BlockIDWidth); 215 EmitVBR(CodeLen, bitc::CodeLenWidth); 216 FlushToWord(); 217 218 unsigned BlockSizeWordIndex = GetWordIndex(); 219 unsigned OldCodeSize = CurCodeSize; 220 221 // Emit a placeholder, which will be replaced when the block is popped. 222 Emit(0, bitc::BlockSizeWidth); 223 224 CurCodeSize = CodeLen; 225 226 // Push the outer block's abbrev set onto the stack, start out with an 227 // empty abbrev set. 228 BlockScope.push_back(Block(OldCodeSize, BlockSizeWordIndex)); 229 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 230 231 // If there is a blockinfo for this BlockID, add all the predefined abbrevs 232 // to the abbrev list. 233 if (BlockInfo *Info = getBlockInfo(BlockID)) { 234 for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size()); 235 i != e; ++i) { 236 CurAbbrevs.push_back(Info->Abbrevs[i]); 237 Info->Abbrevs[i]->addRef(); 238 } 239 } 240 } 241 242 void ExitBlock() { 243 assert(!BlockScope.empty() && "Block scope imbalance!"); 244 245 // Delete all abbrevs. 246 for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size()); 247 i != e; ++i) 248 CurAbbrevs[i]->dropRef(); 249 250 const Block &B = BlockScope.back(); 251 252 // Block tail: 253 // [END_BLOCK, <align4bytes>] 254 EmitCode(bitc::END_BLOCK); 255 FlushToWord(); 256 257 // Compute the size of the block, in words, not counting the size field. 258 unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1; 259 unsigned ByteNo = B.StartSizeWord*4; 260 261 // Update the block size field in the header of this sub-block. 262 BackpatchWord(ByteNo, SizeInWords); 263 264 // Restore the inner block's code size and abbrev table. 265 CurCodeSize = B.PrevCodeSize; 266 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 267 BlockScope.pop_back(); 268 } 269 270 //===--------------------------------------------------------------------===// 271 // Record Emission 272 //===--------------------------------------------------------------------===// 273 274 private: 275 /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev 276 /// record. This is a no-op, since the abbrev specifies the literal to use. 277 template<typename uintty> 278 void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) { 279 assert(Op.isLiteral() && "Not a literal"); 280 // If the abbrev specifies the literal value to use, don't emit 281 // anything. 282 assert(V == Op.getLiteralValue() && 283 "Invalid abbrev for record!"); 284 } 285 286 /// EmitAbbreviatedField - Emit a single scalar field value with the specified 287 /// encoding. 288 template<typename uintty> 289 void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) { 290 assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!"); 291 292 // Encode the value as we are commanded. 293 switch (Op.getEncoding()) { 294 default: llvm_unreachable("Unknown encoding!"); 295 case BitCodeAbbrevOp::Fixed: 296 if (Op.getEncodingData()) 297 Emit((unsigned)V, (unsigned)Op.getEncodingData()); 298 break; 299 case BitCodeAbbrevOp::VBR: 300 if (Op.getEncodingData()) 301 EmitVBR64(V, (unsigned)Op.getEncodingData()); 302 break; 303 case BitCodeAbbrevOp::Char6: 304 Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6); 305 break; 306 } 307 } 308 309 /// EmitRecordWithAbbrevImpl - This is the core implementation of the record 310 /// emission code. If BlobData is non-null, then it specifies an array of 311 /// data that should be emitted as part of the Blob or Array operand that is 312 /// known to exist at the end of the record. 313 template<typename uintty> 314 void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 315 StringRef Blob) { 316 const char *BlobData = Blob.data(); 317 unsigned BlobLen = (unsigned) Blob.size(); 318 unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV; 319 assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); 320 BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo]; 321 322 EmitCode(Abbrev); 323 324 unsigned RecordIdx = 0; 325 for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); 326 i != e; ++i) { 327 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 328 if (Op.isLiteral()) { 329 assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); 330 EmitAbbreviatedLiteral(Op, Vals[RecordIdx]); 331 ++RecordIdx; 332 } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 333 // Array case. 334 assert(i+2 == e && "array op not second to last?"); 335 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 336 337 // If this record has blob data, emit it, otherwise we must have record 338 // entries to encode this way. 339 if (BlobData) { 340 assert(RecordIdx == Vals.size() && 341 "Blob data and record entries specified for array!"); 342 // Emit a vbr6 to indicate the number of elements present. 343 EmitVBR(static_cast<uint32_t>(BlobLen), 6); 344 345 // Emit each field. 346 for (unsigned i = 0; i != BlobLen; ++i) 347 EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]); 348 349 // Know that blob data is consumed for assertion below. 350 BlobData = nullptr; 351 } else { 352 // Emit a vbr6 to indicate the number of elements present. 353 EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6); 354 355 // Emit each field. 356 for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) 357 EmitAbbreviatedField(EltEnc, Vals[RecordIdx]); 358 } 359 } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) { 360 // If this record has blob data, emit it, otherwise we must have record 361 // entries to encode this way. 362 363 // Emit a vbr6 to indicate the number of elements present. 364 if (BlobData) { 365 EmitVBR(static_cast<uint32_t>(BlobLen), 6); 366 assert(RecordIdx == Vals.size() && 367 "Blob data and record entries specified for blob operand!"); 368 } else { 369 EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6); 370 } 371 372 // Flush to a 32-bit alignment boundary. 373 FlushToWord(); 374 375 // Emit each field as a literal byte. 376 if (BlobData) { 377 for (unsigned i = 0; i != BlobLen; ++i) 378 WriteByte((unsigned char)BlobData[i]); 379 380 // Know that blob data is consumed for assertion below. 381 BlobData = nullptr; 382 } else { 383 for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) { 384 assert(isUInt<8>(Vals[RecordIdx]) && 385 "Value too large to emit as blob"); 386 WriteByte((unsigned char)Vals[RecordIdx]); 387 } 388 } 389 390 // Align end to 32-bits. 391 while (GetBufferOffset() & 3) 392 WriteByte(0); 393 } else { // Single scalar field. 394 assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); 395 EmitAbbreviatedField(Op, Vals[RecordIdx]); 396 ++RecordIdx; 397 } 398 } 399 assert(RecordIdx == Vals.size() && "Not all record operands emitted!"); 400 assert(BlobData == nullptr && 401 "Blob data specified for record that doesn't use it!"); 402 } 403 404 public: 405 406 /// EmitRecord - Emit the specified record to the stream, using an abbrev if 407 /// we have one to compress the output. 408 template<typename uintty> 409 void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals, 410 unsigned Abbrev = 0) { 411 if (!Abbrev) { 412 // If we don't have an abbrev to use, emit this in its fully unabbreviated 413 // form. 414 EmitCode(bitc::UNABBREV_RECORD); 415 EmitVBR(Code, 6); 416 EmitVBR(static_cast<uint32_t>(Vals.size()), 6); 417 for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i) 418 EmitVBR64(Vals[i], 6); 419 return; 420 } 421 422 // Insert the code into Vals to treat it uniformly. 423 Vals.insert(Vals.begin(), Code); 424 425 EmitRecordWithAbbrev(Abbrev, Vals); 426 } 427 428 /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation. 429 /// Unlike EmitRecord, the code for the record should be included in Vals as 430 /// the first entry. 431 template<typename uintty> 432 void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) { 433 EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef()); 434 } 435 436 /// EmitRecordWithBlob - Emit the specified record to the stream, using an 437 /// abbrev that includes a blob at the end. The blob data to emit is 438 /// specified by the pointer and length specified at the end. In contrast to 439 /// EmitRecord, this routine expects that the first entry in Vals is the code 440 /// of the record. 441 template<typename uintty> 442 void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 443 StringRef Blob) { 444 EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob); 445 } 446 template<typename uintty> 447 void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 448 const char *BlobData, unsigned BlobLen) { 449 return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen)); 450 } 451 452 /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records 453 /// that end with an array. 454 template<typename uintty> 455 void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 456 StringRef Array) { 457 EmitRecordWithAbbrevImpl(Abbrev, Vals, Array); 458 } 459 template<typename uintty> 460 void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 461 const char *ArrayData, unsigned ArrayLen) { 462 return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, 463 ArrayLen)); 464 } 465 466 //===--------------------------------------------------------------------===// 467 // Abbrev Emission 468 //===--------------------------------------------------------------------===// 469 470 private: 471 // Emit the abbreviation as a DEFINE_ABBREV record. 472 void EncodeAbbrev(BitCodeAbbrev *Abbv) { 473 EmitCode(bitc::DEFINE_ABBREV); 474 EmitVBR(Abbv->getNumOperandInfos(), 5); 475 for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); 476 i != e; ++i) { 477 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 478 Emit(Op.isLiteral(), 1); 479 if (Op.isLiteral()) { 480 EmitVBR64(Op.getLiteralValue(), 8); 481 } else { 482 Emit(Op.getEncoding(), 3); 483 if (Op.hasEncodingData()) 484 EmitVBR64(Op.getEncodingData(), 5); 485 } 486 } 487 } 488 public: 489 490 /// EmitAbbrev - This emits an abbreviation to the stream. Note that this 491 /// method takes ownership of the specified abbrev. 492 unsigned EmitAbbrev(BitCodeAbbrev *Abbv) { 493 // Emit the abbreviation as a record. 494 EncodeAbbrev(Abbv); 495 CurAbbrevs.push_back(Abbv); 496 return static_cast<unsigned>(CurAbbrevs.size())-1 + 497 bitc::FIRST_APPLICATION_ABBREV; 498 } 499 500 //===--------------------------------------------------------------------===// 501 // BlockInfo Block Emission 502 //===--------------------------------------------------------------------===// 503 504 /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK. 505 void EnterBlockInfoBlock(unsigned CodeWidth) { 506 EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth); 507 BlockInfoCurBID = ~0U; 508 } 509 private: 510 /// SwitchToBlockID - If we aren't already talking about the specified block 511 /// ID, emit a BLOCKINFO_CODE_SETBID record. 512 void SwitchToBlockID(unsigned BlockID) { 513 if (BlockInfoCurBID == BlockID) return; 514 SmallVector<unsigned, 2> V; 515 V.push_back(BlockID); 516 EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V); 517 BlockInfoCurBID = BlockID; 518 } 519 520 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 521 if (BlockInfo *BI = getBlockInfo(BlockID)) 522 return *BI; 523 524 // Otherwise, add a new record. 525 BlockInfoRecords.push_back(BlockInfo()); 526 BlockInfoRecords.back().BlockID = BlockID; 527 return BlockInfoRecords.back(); 528 } 529 530 public: 531 532 /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified 533 /// BlockID. 534 unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) { 535 SwitchToBlockID(BlockID); 536 EncodeAbbrev(Abbv); 537 538 // Add the abbrev to the specified block record. 539 BlockInfo &Info = getOrCreateBlockInfo(BlockID); 540 Info.Abbrevs.push_back(Abbv); 541 542 return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV; 543 } 544 }; 545 546 547 } // End llvm namespace 548 549 #endif 550