1 //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This header defines the BitstreamWriter class. This class can be used to 11 // write an arbitrary bitstream, regardless of its contents. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef BITSTREAM_WRITER_H 16 #define BITSTREAM_WRITER_H 17 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/Bitcode/BitCodes.h" 20 #include <vector> 21 22 namespace llvm { 23 24 class BitstreamWriter { 25 std::vector<unsigned char> &Out; 26 27 /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use. 28 unsigned CurBit; 29 30 /// CurValue - The current value. Only bits < CurBit are valid. 31 uint32_t CurValue; 32 33 /// CurCodeSize - This is the declared size of code values used for the 34 /// current block, in bits. 35 unsigned CurCodeSize; 36 37 /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently 38 /// selected BLOCK ID. 39 unsigned BlockInfoCurBID; 40 41 /// CurAbbrevs - Abbrevs installed at in this block. 42 std::vector<BitCodeAbbrev*> CurAbbrevs; 43 44 struct Block { 45 unsigned PrevCodeSize; 46 unsigned StartSizeWord; 47 std::vector<BitCodeAbbrev*> PrevAbbrevs; 48 Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {} 49 }; 50 51 /// BlockScope - This tracks the current blocks that we have entered. 52 std::vector<Block> BlockScope; 53 54 /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. 55 /// These describe abbreviations that all blocks of the specified ID inherit. 56 struct BlockInfo { 57 unsigned BlockID; 58 std::vector<BitCodeAbbrev*> Abbrevs; 59 }; 60 std::vector<BlockInfo> BlockInfoRecords; 61 62 public: 63 explicit BitstreamWriter(std::vector<unsigned char> &O) 64 : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {} 65 66 ~BitstreamWriter() { 67 assert(CurBit == 0 && "Unflused data remaining"); 68 assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance"); 69 70 // Free the BlockInfoRecords. 71 while (!BlockInfoRecords.empty()) { 72 BlockInfo &Info = BlockInfoRecords.back(); 73 // Free blockinfo abbrev info. 74 for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size()); 75 i != e; ++i) 76 Info.Abbrevs[i]->dropRef(); 77 BlockInfoRecords.pop_back(); 78 } 79 } 80 81 std::vector<unsigned char> &getBuffer() { return Out; } 82 83 /// \brief Retrieve the current position in the stream, in bits. 84 uint64_t GetCurrentBitNo() const { return Out.size() * 8 + CurBit; } 85 86 //===--------------------------------------------------------------------===// 87 // Basic Primitives for emitting bits to the stream. 88 //===--------------------------------------------------------------------===// 89 90 void Emit(uint32_t Val, unsigned NumBits) { 91 assert(NumBits && NumBits <= 32 && "Invalid value size!"); 92 assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!"); 93 CurValue |= Val << CurBit; 94 if (CurBit + NumBits < 32) { 95 CurBit += NumBits; 96 return; 97 } 98 99 // Add the current word. 100 unsigned V = CurValue; 101 Out.push_back((unsigned char)(V >> 0)); 102 Out.push_back((unsigned char)(V >> 8)); 103 Out.push_back((unsigned char)(V >> 16)); 104 Out.push_back((unsigned char)(V >> 24)); 105 106 if (CurBit) 107 CurValue = Val >> (32-CurBit); 108 else 109 CurValue = 0; 110 CurBit = (CurBit+NumBits) & 31; 111 } 112 113 void Emit64(uint64_t Val, unsigned NumBits) { 114 if (NumBits <= 32) 115 Emit((uint32_t)Val, NumBits); 116 else { 117 Emit((uint32_t)Val, 32); 118 Emit((uint32_t)(Val >> 32), NumBits-32); 119 } 120 } 121 122 void FlushToWord() { 123 if (CurBit) { 124 unsigned V = CurValue; 125 Out.push_back((unsigned char)(V >> 0)); 126 Out.push_back((unsigned char)(V >> 8)); 127 Out.push_back((unsigned char)(V >> 16)); 128 Out.push_back((unsigned char)(V >> 24)); 129 CurBit = 0; 130 CurValue = 0; 131 } 132 } 133 134 void EmitVBR(uint32_t Val, unsigned NumBits) { 135 uint32_t Threshold = 1U << (NumBits-1); 136 137 // Emit the bits with VBR encoding, NumBits-1 bits at a time. 138 while (Val >= Threshold) { 139 Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits); 140 Val >>= NumBits-1; 141 } 142 143 Emit(Val, NumBits); 144 } 145 146 void EmitVBR64(uint64_t Val, unsigned NumBits) { 147 if ((uint32_t)Val == Val) 148 return EmitVBR((uint32_t)Val, NumBits); 149 150 uint64_t Threshold = 1U << (NumBits-1); 151 152 // Emit the bits with VBR encoding, NumBits-1 bits at a time. 153 while (Val >= Threshold) { 154 Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) | 155 (1 << (NumBits-1)), NumBits); 156 Val >>= NumBits-1; 157 } 158 159 Emit((uint32_t)Val, NumBits); 160 } 161 162 /// EmitCode - Emit the specified code. 163 void EmitCode(unsigned Val) { 164 Emit(Val, CurCodeSize); 165 } 166 167 // BackpatchWord - Backpatch a 32-bit word in the output with the specified 168 // value. 169 void BackpatchWord(unsigned ByteNo, unsigned NewWord) { 170 Out[ByteNo++] = (unsigned char)(NewWord >> 0); 171 Out[ByteNo++] = (unsigned char)(NewWord >> 8); 172 Out[ByteNo++] = (unsigned char)(NewWord >> 16); 173 Out[ByteNo ] = (unsigned char)(NewWord >> 24); 174 } 175 176 //===--------------------------------------------------------------------===// 177 // Block Manipulation 178 //===--------------------------------------------------------------------===// 179 180 /// getBlockInfo - If there is block info for the specified ID, return it, 181 /// otherwise return null. 182 BlockInfo *getBlockInfo(unsigned BlockID) { 183 // Common case, the most recent entry matches BlockID. 184 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 185 return &BlockInfoRecords.back(); 186 187 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); 188 i != e; ++i) 189 if (BlockInfoRecords[i].BlockID == BlockID) 190 return &BlockInfoRecords[i]; 191 return 0; 192 } 193 194 void EnterSubblock(unsigned BlockID, unsigned CodeLen) { 195 // Block header: 196 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 197 EmitCode(bitc::ENTER_SUBBLOCK); 198 EmitVBR(BlockID, bitc::BlockIDWidth); 199 EmitVBR(CodeLen, bitc::CodeLenWidth); 200 FlushToWord(); 201 202 unsigned BlockSizeWordLoc = static_cast<unsigned>(Out.size()); 203 unsigned OldCodeSize = CurCodeSize; 204 205 // Emit a placeholder, which will be replaced when the block is popped. 206 Emit(0, bitc::BlockSizeWidth); 207 208 CurCodeSize = CodeLen; 209 210 // Push the outer block's abbrev set onto the stack, start out with an 211 // empty abbrev set. 212 BlockScope.push_back(Block(OldCodeSize, BlockSizeWordLoc/4)); 213 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 214 215 // If there is a blockinfo for this BlockID, add all the predefined abbrevs 216 // to the abbrev list. 217 if (BlockInfo *Info = getBlockInfo(BlockID)) { 218 for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size()); 219 i != e; ++i) { 220 CurAbbrevs.push_back(Info->Abbrevs[i]); 221 Info->Abbrevs[i]->addRef(); 222 } 223 } 224 } 225 226 void ExitBlock() { 227 assert(!BlockScope.empty() && "Block scope imbalance!"); 228 229 // Delete all abbrevs. 230 for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size()); 231 i != e; ++i) 232 CurAbbrevs[i]->dropRef(); 233 234 const Block &B = BlockScope.back(); 235 236 // Block tail: 237 // [END_BLOCK, <align4bytes>] 238 EmitCode(bitc::END_BLOCK); 239 FlushToWord(); 240 241 // Compute the size of the block, in words, not counting the size field. 242 unsigned SizeInWords= static_cast<unsigned>(Out.size())/4-B.StartSizeWord-1; 243 unsigned ByteNo = B.StartSizeWord*4; 244 245 // Update the block size field in the header of this sub-block. 246 BackpatchWord(ByteNo, SizeInWords); 247 248 // Restore the inner block's code size and abbrev table. 249 CurCodeSize = B.PrevCodeSize; 250 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 251 BlockScope.pop_back(); 252 } 253 254 //===--------------------------------------------------------------------===// 255 // Record Emission 256 //===--------------------------------------------------------------------===// 257 258 private: 259 /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev 260 /// record. This is a no-op, since the abbrev specifies the literal to use. 261 template<typename uintty> 262 void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) { 263 assert(Op.isLiteral() && "Not a literal"); 264 // If the abbrev specifies the literal value to use, don't emit 265 // anything. 266 assert(V == Op.getLiteralValue() && 267 "Invalid abbrev for record!"); 268 } 269 270 /// EmitAbbreviatedField - Emit a single scalar field value with the specified 271 /// encoding. 272 template<typename uintty> 273 void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) { 274 assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!"); 275 276 // Encode the value as we are commanded. 277 switch (Op.getEncoding()) { 278 default: assert(0 && "Unknown encoding!"); 279 case BitCodeAbbrevOp::Fixed: 280 if (Op.getEncodingData()) 281 Emit((unsigned)V, (unsigned)Op.getEncodingData()); 282 break; 283 case BitCodeAbbrevOp::VBR: 284 if (Op.getEncodingData()) 285 EmitVBR64(V, (unsigned)Op.getEncodingData()); 286 break; 287 case BitCodeAbbrevOp::Char6: 288 Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6); 289 break; 290 } 291 } 292 293 /// EmitRecordWithAbbrevImpl - This is the core implementation of the record 294 /// emission code. If BlobData is non-null, then it specifies an array of 295 /// data that should be emitted as part of the Blob or Array operand that is 296 /// known to exist at the end of the record. 297 template<typename uintty> 298 void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 299 StringRef Blob) { 300 const char *BlobData = Blob.data(); 301 unsigned BlobLen = (unsigned) Blob.size(); 302 unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV; 303 assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); 304 BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo]; 305 306 EmitCode(Abbrev); 307 308 unsigned RecordIdx = 0; 309 for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); 310 i != e; ++i) { 311 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 312 if (Op.isLiteral()) { 313 assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); 314 EmitAbbreviatedLiteral(Op, Vals[RecordIdx]); 315 ++RecordIdx; 316 } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 317 // Array case. 318 assert(i+2 == e && "array op not second to last?"); 319 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 320 321 // If this record has blob data, emit it, otherwise we must have record 322 // entries to encode this way. 323 if (BlobData) { 324 assert(RecordIdx == Vals.size() && 325 "Blob data and record entries specified for array!"); 326 // Emit a vbr6 to indicate the number of elements present. 327 EmitVBR(static_cast<uint32_t>(BlobLen), 6); 328 329 // Emit each field. 330 for (unsigned i = 0; i != BlobLen; ++i) 331 EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]); 332 333 // Know that blob data is consumed for assertion below. 334 BlobData = 0; 335 } else { 336 // Emit a vbr6 to indicate the number of elements present. 337 EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6); 338 339 // Emit each field. 340 for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) 341 EmitAbbreviatedField(EltEnc, Vals[RecordIdx]); 342 } 343 } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) { 344 // If this record has blob data, emit it, otherwise we must have record 345 // entries to encode this way. 346 347 // Emit a vbr6 to indicate the number of elements present. 348 if (BlobData) { 349 EmitVBR(static_cast<uint32_t>(BlobLen), 6); 350 assert(RecordIdx == Vals.size() && 351 "Blob data and record entries specified for blob operand!"); 352 } else { 353 EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6); 354 } 355 356 // Flush to a 32-bit alignment boundary. 357 FlushToWord(); 358 assert((Out.size() & 3) == 0 && "Not 32-bit aligned"); 359 360 // Emit each field as a literal byte. 361 if (BlobData) { 362 for (unsigned i = 0; i != BlobLen; ++i) 363 Out.push_back((unsigned char)BlobData[i]); 364 365 // Know that blob data is consumed for assertion below. 366 BlobData = 0; 367 } else { 368 for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) { 369 assert(Vals[RecordIdx] < 256 && "Value too large to emit as blob"); 370 Out.push_back((unsigned char)Vals[RecordIdx]); 371 } 372 } 373 // Align end to 32-bits. 374 while (Out.size() & 3) 375 Out.push_back(0); 376 377 } else { // Single scalar field. 378 assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); 379 EmitAbbreviatedField(Op, Vals[RecordIdx]); 380 ++RecordIdx; 381 } 382 } 383 assert(RecordIdx == Vals.size() && "Not all record operands emitted!"); 384 assert(BlobData == 0 && 385 "Blob data specified for record that doesn't use it!"); 386 } 387 388 public: 389 390 /// EmitRecord - Emit the specified record to the stream, using an abbrev if 391 /// we have one to compress the output. 392 template<typename uintty> 393 void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals, 394 unsigned Abbrev = 0) { 395 if (!Abbrev) { 396 // If we don't have an abbrev to use, emit this in its fully unabbreviated 397 // form. 398 EmitCode(bitc::UNABBREV_RECORD); 399 EmitVBR(Code, 6); 400 EmitVBR(static_cast<uint32_t>(Vals.size()), 6); 401 for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i) 402 EmitVBR64(Vals[i], 6); 403 return; 404 } 405 406 // Insert the code into Vals to treat it uniformly. 407 Vals.insert(Vals.begin(), Code); 408 409 EmitRecordWithAbbrev(Abbrev, Vals); 410 } 411 412 /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation. 413 /// Unlike EmitRecord, the code for the record should be included in Vals as 414 /// the first entry. 415 template<typename uintty> 416 void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) { 417 EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef()); 418 } 419 420 /// EmitRecordWithBlob - Emit the specified record to the stream, using an 421 /// abbrev that includes a blob at the end. The blob data to emit is 422 /// specified by the pointer and length specified at the end. In contrast to 423 /// EmitRecord, this routine expects that the first entry in Vals is the code 424 /// of the record. 425 template<typename uintty> 426 void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 427 StringRef Blob) { 428 EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob); 429 } 430 template<typename uintty> 431 void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 432 const char *BlobData, unsigned BlobLen) { 433 return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen)); 434 } 435 436 /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records 437 /// that end with an array. 438 template<typename uintty> 439 void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 440 StringRef Array) { 441 EmitRecordWithAbbrevImpl(Abbrev, Vals, Array); 442 } 443 template<typename uintty> 444 void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, 445 const char *ArrayData, unsigned ArrayLen) { 446 return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, 447 ArrayLen)); 448 } 449 450 //===--------------------------------------------------------------------===// 451 // Abbrev Emission 452 //===--------------------------------------------------------------------===// 453 454 private: 455 // Emit the abbreviation as a DEFINE_ABBREV record. 456 void EncodeAbbrev(BitCodeAbbrev *Abbv) { 457 EmitCode(bitc::DEFINE_ABBREV); 458 EmitVBR(Abbv->getNumOperandInfos(), 5); 459 for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); 460 i != e; ++i) { 461 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 462 Emit(Op.isLiteral(), 1); 463 if (Op.isLiteral()) { 464 EmitVBR64(Op.getLiteralValue(), 8); 465 } else { 466 Emit(Op.getEncoding(), 3); 467 if (Op.hasEncodingData()) 468 EmitVBR64(Op.getEncodingData(), 5); 469 } 470 } 471 } 472 public: 473 474 /// EmitAbbrev - This emits an abbreviation to the stream. Note that this 475 /// method takes ownership of the specified abbrev. 476 unsigned EmitAbbrev(BitCodeAbbrev *Abbv) { 477 // Emit the abbreviation as a record. 478 EncodeAbbrev(Abbv); 479 CurAbbrevs.push_back(Abbv); 480 return static_cast<unsigned>(CurAbbrevs.size())-1 + 481 bitc::FIRST_APPLICATION_ABBREV; 482 } 483 484 //===--------------------------------------------------------------------===// 485 // BlockInfo Block Emission 486 //===--------------------------------------------------------------------===// 487 488 /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK. 489 void EnterBlockInfoBlock(unsigned CodeWidth) { 490 EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth); 491 BlockInfoCurBID = -1U; 492 } 493 private: 494 /// SwitchToBlockID - If we aren't already talking about the specified block 495 /// ID, emit a BLOCKINFO_CODE_SETBID record. 496 void SwitchToBlockID(unsigned BlockID) { 497 if (BlockInfoCurBID == BlockID) return; 498 SmallVector<unsigned, 2> V; 499 V.push_back(BlockID); 500 EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V); 501 BlockInfoCurBID = BlockID; 502 } 503 504 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 505 if (BlockInfo *BI = getBlockInfo(BlockID)) 506 return *BI; 507 508 // Otherwise, add a new record. 509 BlockInfoRecords.push_back(BlockInfo()); 510 BlockInfoRecords.back().BlockID = BlockID; 511 return BlockInfoRecords.back(); 512 } 513 514 public: 515 516 /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified 517 /// BlockID. 518 unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) { 519 SwitchToBlockID(BlockID); 520 EncodeAbbrev(Abbv); 521 522 // Add the abbrev to the specified block record. 523 BlockInfo &Info = getOrCreateBlockInfo(BlockID); 524 Info.Abbrevs.push_back(Abbv); 525 526 return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV; 527 } 528 }; 529 530 531 } // End llvm namespace 532 533 #endif 534