1 // Copyright (c) 2017 Google Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Contains utils for reading, writing and debug printing bit streams. 16 17 #ifndef LIBSPIRV_UTIL_BIT_STREAM_H_ 18 #define LIBSPIRV_UTIL_BIT_STREAM_H_ 19 20 #include <algorithm> 21 #include <bitset> 22 #include <cstdint> 23 #include <functional> 24 #include <string> 25 #include <sstream> 26 #include <vector> 27 28 namespace spvutils { 29 30 // Returns rounded down log2(val). log2(0) is considered 0. 31 size_t Log2U64(uint64_t val); 32 33 // Terminology: 34 // Bits - usually used for a uint64 word, first bit is the lowest. 35 // Stream - std::string of '0' and '1', read left-to-right, 36 // i.e. first bit is at the front and not at the end as in 37 // std::bitset::to_string(). 38 // Bitset - std::bitset corresponding to uint64 bits and to reverse(stream). 39 40 // Converts number of bits to a respective number of chunks of size N. 41 // For example NumBitsToNumWords<8> returns how many bytes are needed to store 42 // |num_bits|. 43 template <size_t N> 44 inline size_t NumBitsToNumWords(size_t num_bits) { 45 return (num_bits + (N - 1)) / N; 46 } 47 48 // Returns value of the same type as |in|, where all but the first |num_bits| 49 // are set to zero. 50 template <typename T> 51 inline T GetLowerBits(T in, size_t num_bits) { 52 return sizeof(T) * 8 == num_bits ? in : in & T((T(1) << num_bits) - T(1)); 53 } 54 55 // Encodes signed integer as unsigned in zigzag order: 56 // 0 -> 0 57 // -1 -> 1 58 // 1 -> 2 59 // -2 -> 3 60 // 2 -> 4 61 // Motivation: -1 is 0xFF...FF what doesn't work very well with 62 // WriteVariableWidth which prefers to have as many 0 bits as possible. 63 inline uint64_t EncodeZigZag(int64_t val) { 64 return (val << 1) ^ (val >> 63); 65 } 66 67 // Decodes signed integer encoded with EncodeZigZag. 68 inline int64_t DecodeZigZag(uint64_t val) { 69 if (val & 1) { 70 // Negative. 71 // 1 -> -1 72 // 3 -> -2 73 // 5 -> -3 74 return -1 - (val >> 1); 75 } else { 76 // Non-negative. 77 // 0 -> 0 78 // 2 -> 1 79 // 4 -> 2 80 return val >> 1; 81 } 82 } 83 84 // Encodes signed integer as unsigned. This is a generalized version of 85 // EncodeZigZag, designed to favor small positive numbers. 86 // Values are transformed in blocks of 2^|block_exponent|. 87 // If |block_exponent| is zero, then this degenerates into normal EncodeZigZag. 88 // Example when |block_exponent| is 1 (return value is the index): 89 // 0, 1, -1, -2, 2, 3, -3, -4, 4, 5, -5, -6, 6, 7, -7, -8 90 // Example when |block_exponent| is 2: 91 // 0, 1, 2, 3, -1, -2, -3, -4, 4, 5, 6, 7, -5, -6, -7, -8 92 inline uint64_t EncodeZigZag(int64_t val, size_t block_exponent) { 93 assert(block_exponent < 64); 94 const uint64_t uval = static_cast<uint64_t>(val >= 0 ? val : -val - 1); 95 const uint64_t block_num = ((uval >> block_exponent) << 1) + (val >= 0 ? 0 : 1); 96 const uint64_t pos = GetLowerBits(uval, block_exponent); 97 return (block_num << block_exponent) + pos; 98 } 99 100 // Decodes signed integer encoded with EncodeZigZag. |block_exponent| must be 101 // the same. 102 inline int64_t DecodeZigZag(uint64_t val, size_t block_exponent) { 103 assert(block_exponent < 64); 104 const uint64_t block_num = val >> block_exponent; 105 const uint64_t pos = GetLowerBits(val, block_exponent); 106 if (block_num & 1) { 107 // Negative. 108 return -1LL - ((block_num >> 1) << block_exponent) - pos; 109 } else { 110 // Positive. 111 return ((block_num >> 1) << block_exponent) + pos; 112 } 113 } 114 115 // Converts |buffer| to a stream of '0' and '1'. 116 template <typename T> 117 std::string BufferToStream(const std::vector<T>& buffer) { 118 std::stringstream ss; 119 for (auto it = buffer.begin(); it != buffer.end(); ++it) { 120 std::string str = std::bitset<sizeof(T) * 8>(*it).to_string(); 121 // Strings generated by std::bitset::to_string are read right to left. 122 // Reversing to left to right. 123 std::reverse(str.begin(), str.end()); 124 ss << str; 125 } 126 return ss.str(); 127 } 128 129 // Converts a left-to-right input string of '0' and '1' to a buffer of |T| 130 // words. 131 template <typename T> 132 std::vector<T> StreamToBuffer(std::string str) { 133 // The input string is left-to-right, the input argument of std::bitset needs 134 // to right-to-left. Instead of reversing tokens, reverse the entire string 135 // and iterate tokens from end to begin. 136 std::reverse(str.begin(), str.end()); 137 const int word_size = static_cast<int>(sizeof(T) * 8); 138 const int str_length = static_cast<int>(str.length()); 139 std::vector<T> buffer; 140 buffer.reserve(NumBitsToNumWords<sizeof(T)>(str.length())); 141 for (int index = str_length - word_size; index >= 0; index -= word_size) { 142 buffer.push_back(static_cast<T>(std::bitset<sizeof(T) * 8>( 143 str, index, word_size).to_ullong())); 144 } 145 const size_t suffix_length = str.length() % word_size; 146 if (suffix_length != 0) { 147 buffer.push_back(static_cast<T>(std::bitset<sizeof(T) * 8>( 148 str, 0, suffix_length).to_ullong())); 149 } 150 return buffer; 151 } 152 153 // Adds '0' chars at the end of the string until the size is a multiple of N. 154 template <size_t N> 155 inline std::string PadToWord(std::string&& str) { 156 const size_t tail_length = str.size() % N; 157 if (tail_length != 0) 158 str += std::string(N - tail_length, '0'); 159 return str; 160 } 161 162 // Adds '0' chars at the end of the string until the size is a multiple of N. 163 template <size_t N> 164 inline std::string PadToWord(const std::string& str) { 165 return PadToWord<N>(std::string(str)); 166 } 167 168 // Converts a left-to-right stream of bits to std::bitset. 169 template <size_t N> 170 inline std::bitset<N> StreamToBitset(std::string str) { 171 std::reverse(str.begin(), str.end()); 172 return std::bitset<N>(str); 173 } 174 175 // Converts first |num_bits| of std::bitset to a left-to-right stream of bits. 176 template <size_t N> 177 inline std::string BitsetToStream(const std::bitset<N>& bits, size_t num_bits = N) { 178 std::string str = bits.to_string().substr(N - num_bits); 179 std::reverse(str.begin(), str.end()); 180 return str; 181 } 182 183 // Converts a left-to-right stream of bits to uint64. 184 inline uint64_t StreamToBits(std::string str) { 185 std::reverse(str.begin(), str.end()); 186 return std::bitset<64>(str).to_ullong(); 187 } 188 189 // Converts first |num_bits| stored in uint64 to a left-to-right stream of bits. 190 inline std::string BitsToStream(uint64_t bits, size_t num_bits = 64) { 191 std::bitset<64> bitset(bits); 192 return BitsetToStream(bitset, num_bits); 193 } 194 195 // Base class for writing sequences of bits. 196 class BitWriterInterface { 197 public: 198 BitWriterInterface() {} 199 virtual ~BitWriterInterface() {} 200 201 // Writes lower |num_bits| in |bits| to the stream. 202 // |num_bits| must be no greater than 64. 203 virtual void WriteBits(uint64_t bits, size_t num_bits) = 0; 204 205 // Writes left-to-right string of '0' and '1' to stream. 206 // String length must be no greater than 64. 207 // Note: "01" will be writen as 0x2, not 0x1. The string doesn't represent 208 // numbers but a stream of bits in the order they come from encoder. 209 virtual void WriteStream(const std::string& bits) { 210 WriteBits(StreamToBits(bits), bits.length()); 211 } 212 213 // Writes lower |num_bits| in |bits| to the stream. 214 // |num_bits| must be no greater than 64. 215 template <size_t N> 216 void WriteBitset(const std::bitset<N>& bits, size_t num_bits = N) { 217 WriteBits(bits.to_ullong(), num_bits); 218 } 219 220 // Writes bits from value of type |T| to the stream. No encoding is done. 221 // Always writes 8 * sizeof(T) bits. 222 template <typename T> 223 void WriteUnencoded(T val) { 224 static_assert(sizeof(T) <= 64, "Type size too large"); 225 uint64_t bits = 0; 226 memcpy(&bits, &val, sizeof(T)); 227 WriteBits(bits, sizeof(T) * 8); 228 } 229 230 // Writes |val| in chunks of size |chunk_length| followed by a signal bit: 231 // 0 - no more chunks to follow 232 // 1 - more chunks to follow 233 // for example 255 is encoded into 1111 1 1111 0 for chunk length 4. 234 // The last chunk can be truncated and signal bit omitted, if the entire 235 // payload (for example 16 bit for uint16_t has already been written). 236 void WriteVariableWidthU64(uint64_t val, size_t chunk_length); 237 void WriteVariableWidthU32(uint32_t val, size_t chunk_length); 238 void WriteVariableWidthU16(uint16_t val, size_t chunk_length); 239 void WriteVariableWidthU8(uint8_t val, size_t chunk_length); 240 void WriteVariableWidthS64( 241 int64_t val, size_t chunk_length, size_t zigzag_exponent); 242 void WriteVariableWidthS32( 243 int32_t val, size_t chunk_length, size_t zigzag_exponent); 244 void WriteVariableWidthS16( 245 int16_t val, size_t chunk_length, size_t zigzag_exponent); 246 void WriteVariableWidthS8( 247 int8_t val, size_t chunk_length, size_t zigzag_exponent); 248 249 // Writes |val| using fixed bit width. Bit width is determined by |max_val|: 250 // max_val 0 -> bit width 1 251 // max_val 1 -> bit width 1 252 // max_val 2 -> bit width 2 253 // max_val 3 -> bit width 2 254 // max_val 4 -> bit width 3 255 // max_val 5 -> bit width 3 256 // max_val 8 -> bit width 4 257 // max_val n -> bit width 1 + floor(log2(n)) 258 // |val| needs to be <= |max_val|. 259 void WriteFixedWidth(uint64_t val, uint64_t max_val); 260 261 // Returns number of bits written. 262 virtual size_t GetNumBits() const = 0; 263 264 // Provides direct access to the buffer data if implemented. 265 virtual const uint8_t* GetData() const { 266 return nullptr; 267 } 268 269 // Returns buffer size in bytes. 270 size_t GetDataSizeBytes() const { 271 return NumBitsToNumWords<8>(GetNumBits()); 272 } 273 274 // Generates and returns byte array containing written bits. 275 virtual std::vector<uint8_t> GetDataCopy() const = 0; 276 277 BitWriterInterface(const BitWriterInterface&) = delete; 278 BitWriterInterface& operator=(const BitWriterInterface&) = delete; 279 }; 280 281 // This class is an implementation of BitWriterInterface, using 282 // std::vector<uint64_t> to store written bits. 283 class BitWriterWord64 : public BitWriterInterface { 284 public: 285 explicit BitWriterWord64(size_t reserve_bits = 64); 286 287 void WriteBits(uint64_t bits, size_t num_bits) override; 288 289 size_t GetNumBits() const override { 290 return end_; 291 } 292 293 const uint8_t* GetData() const override { 294 return reinterpret_cast<const uint8_t*>(buffer_.data()); 295 } 296 297 std::vector<uint8_t> GetDataCopy() const override { 298 return std::vector<uint8_t>(GetData(), GetData() + GetDataSizeBytes()); 299 } 300 301 // Returns written stream as std::string, padded with zeroes so that the 302 // length is a multiple of 64. 303 std::string GetStreamPadded64() const { 304 return BufferToStream(buffer_); 305 } 306 307 // Sets callback to emit bit sequences after every write. 308 void SetCallback(std::function<void(const std::string&)> callback) { 309 callback_ = callback; 310 } 311 312 protected: 313 // Sends string generated from arguments to callback_ if defined. 314 void EmitSequence(uint64_t bits, size_t num_bits) const { 315 if (callback_) 316 callback_(BitsToStream(bits, num_bits)); 317 } 318 319 private: 320 std::vector<uint64_t> buffer_; 321 // Total number of bits written so far. Named 'end' as analogy to std::end(). 322 size_t end_; 323 324 // If not null, the writer will use the callback to emit the written bit 325 // sequence as a string of '0' and '1'. 326 std::function<void(const std::string&)> callback_; 327 }; 328 329 // Base class for reading sequences of bits. 330 class BitReaderInterface { 331 public: 332 BitReaderInterface() {} 333 virtual ~BitReaderInterface() {} 334 335 // Reads |num_bits| from the stream, stores them in |bits|. 336 // Returns number of read bits. |num_bits| must be no greater than 64. 337 virtual size_t ReadBits(uint64_t* bits, size_t num_bits) = 0; 338 339 // Reads |num_bits| from the stream, stores them in |bits|. 340 // Returns number of read bits. |num_bits| must be no greater than 64. 341 template <size_t N> 342 size_t ReadBitset(std::bitset<N>* bits, size_t num_bits = N) { 343 uint64_t val = 0; 344 size_t num_read = ReadBits(&val, num_bits); 345 if (num_read) { 346 *bits = std::bitset<N>(val); 347 } 348 return num_read; 349 } 350 351 // Reads |num_bits| from the stream, returns string in left-to-right order. 352 // The length of the returned string may be less than |num_bits| if end was 353 // reached. 354 std::string ReadStream(size_t num_bits) { 355 uint64_t bits = 0; 356 size_t num_read = ReadBits(&bits, num_bits); 357 return BitsToStream(bits, num_read); 358 } 359 360 // Reads 8 * sizeof(T) bits and stores them in |val|. 361 template <typename T> 362 bool ReadUnencoded(T* val) { 363 static_assert(sizeof(T) <= 64, "Type size too large"); 364 uint64_t bits = 0; 365 const size_t num_read = ReadBits(&bits, sizeof(T) * 8); 366 if (num_read != sizeof(T) * 8) 367 return false; 368 memcpy(val, &bits, sizeof(T)); 369 return true; 370 } 371 372 // Returns number of bits already read. 373 virtual size_t GetNumReadBits() const = 0; 374 375 // These two functions define 'hard' and 'soft' EOF. 376 // 377 // Returns true if the end of the buffer was reached. 378 virtual bool ReachedEnd() const = 0; 379 // Returns true if we reached the end of the buffer or are nearing it and only 380 // zero bits are left to read. Implementations of this function are allowed to 381 // commit a "false negative" error if the end of the buffer was not reached, 382 // i.e. it can return false even if indeed only zeroes are left. 383 // It is assumed that the consumer expects that 384 // the buffer stream ends with padding zeroes, and would accept this as a 385 // 'soft' EOF. Implementations of this class do not necessarily need to 386 // implement this, default behavior can simply delegate to ReachedEnd(). 387 virtual bool OnlyZeroesLeft() const { 388 return ReachedEnd(); 389 } 390 391 // Reads value encoded with WriteVariableWidthXXX (see BitWriterInterface). 392 // Reader and writer must use the same |chunk_length| and variable type. 393 // Returns true on success, false if the bit stream ends prematurely. 394 bool ReadVariableWidthU64(uint64_t* val, size_t chunk_length); 395 bool ReadVariableWidthU32(uint32_t* val, size_t chunk_length); 396 bool ReadVariableWidthU16(uint16_t* val, size_t chunk_length); 397 bool ReadVariableWidthU8(uint8_t* val, size_t chunk_length); 398 bool ReadVariableWidthS64( 399 int64_t* val, size_t chunk_length, size_t zigzag_exponent); 400 bool ReadVariableWidthS32( 401 int32_t* val, size_t chunk_length, size_t zigzag_exponent); 402 bool ReadVariableWidthS16( 403 int16_t* val, size_t chunk_length, size_t zigzag_exponent); 404 bool ReadVariableWidthS8( 405 int8_t* val, size_t chunk_length, size_t zigzag_exponent); 406 407 // Reads value written by WriteFixedWidth (|max_val| needs to be the same). 408 // Returns true on success, false if the bit stream ends prematurely. 409 bool ReadFixedWidth(uint64_t* val, uint64_t max_val); 410 411 BitReaderInterface(const BitReaderInterface&) = delete; 412 BitReaderInterface& operator=(const BitReaderInterface&) = delete; 413 }; 414 415 // This class is an implementation of BitReaderInterface which accepts both 416 // uint8_t and uint64_t buffers as input. uint64_t buffers are consumed and 417 // owned. uint8_t buffers are copied. 418 class BitReaderWord64 : public BitReaderInterface { 419 public: 420 // Consumes and owns the buffer. 421 explicit BitReaderWord64(std::vector<uint64_t>&& buffer); 422 423 // Copies the buffer and casts it to uint64. 424 // Consuming the original buffer and casting it to uint64 is difficult, 425 // as it would potentially cause data misalignment and poor performance. 426 explicit BitReaderWord64(const std::vector<uint8_t>& buffer); 427 BitReaderWord64(const void* buffer, size_t num_bytes); 428 429 size_t ReadBits(uint64_t* bits, size_t num_bits) override; 430 431 size_t GetNumReadBits() const override { 432 return pos_; 433 } 434 435 bool ReachedEnd() const override; 436 bool OnlyZeroesLeft() const override; 437 438 BitReaderWord64() = delete; 439 private: 440 const std::vector<uint64_t> buffer_; 441 size_t pos_; 442 }; 443 444 } // namespace spvutils 445 446 #endif // LIBSPIRV_UTIL_BIT_STREAM_H_ 447