Home | History | Annotate | Download | only in util
      1 // Copyright (c) 2017 Google Inc.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // Contains utils for reading, writing and debug printing bit streams.
     16 
     17 #ifndef LIBSPIRV_UTIL_BIT_STREAM_H_
     18 #define LIBSPIRV_UTIL_BIT_STREAM_H_
     19 
     20 #include <algorithm>
     21 #include <bitset>
     22 #include <cstdint>
     23 #include <functional>
     24 #include <string>
     25 #include <sstream>
     26 #include <vector>
     27 
     28 namespace spvutils {
     29 
     30 // Returns rounded down log2(val). log2(0) is considered 0.
     31 size_t Log2U64(uint64_t val);
     32 
     33 // Terminology:
     34 // Bits - usually used for a uint64 word, first bit is the lowest.
     35 // Stream - std::string of '0' and '1', read left-to-right,
     36 //          i.e. first bit is at the front and not at the end as in
     37 //          std::bitset::to_string().
     38 // Bitset - std::bitset corresponding to uint64 bits and to reverse(stream).
     39 
     40 // Converts number of bits to a respective number of chunks of size N.
     41 // For example NumBitsToNumWords<8> returns how many bytes are needed to store
     42 // |num_bits|.
     43 template <size_t N>
     44 inline size_t NumBitsToNumWords(size_t num_bits) {
     45   return (num_bits + (N - 1)) / N;
     46 }
     47 
     48 // Returns value of the same type as |in|, where all but the first |num_bits|
     49 // are set to zero.
     50 template <typename T>
     51 inline T GetLowerBits(T in, size_t num_bits) {
     52   return sizeof(T) * 8 == num_bits ? in : in & T((T(1) << num_bits) - T(1));
     53 }
     54 
     55 // Encodes signed integer as unsigned in zigzag order:
     56 //  0 -> 0
     57 // -1 -> 1
     58 //  1 -> 2
     59 // -2 -> 3
     60 //  2 -> 4
     61 // Motivation: -1 is 0xFF...FF what doesn't work very well with
     62 // WriteVariableWidth which prefers to have as many 0 bits as possible.
     63 inline uint64_t EncodeZigZag(int64_t val) {
     64   return (val << 1) ^ (val >> 63);
     65 }
     66 
     67 // Decodes signed integer encoded with EncodeZigZag.
     68 inline int64_t DecodeZigZag(uint64_t val) {
     69   if (val & 1) {
     70     // Negative.
     71     // 1 -> -1
     72     // 3 -> -2
     73     // 5 -> -3
     74     return -1 - (val >> 1);
     75   } else {
     76     // Non-negative.
     77     // 0 -> 0
     78     // 2 -> 1
     79     // 4 -> 2
     80     return val >> 1;
     81   }
     82 }
     83 
     84 // Encodes signed integer as unsigned. This is a generalized version of
     85 // EncodeZigZag, designed to favor small positive numbers.
     86 // Values are transformed in blocks of 2^|block_exponent|.
     87 // If |block_exponent| is zero, then this degenerates into normal EncodeZigZag.
     88 // Example when |block_exponent| is 1 (return value is the index):
     89 // 0, 1, -1, -2, 2, 3, -3, -4, 4, 5, -5, -6, 6, 7, -7, -8
     90 // Example when |block_exponent| is 2:
     91 // 0, 1, 2, 3, -1, -2, -3, -4, 4, 5, 6, 7, -5, -6, -7, -8
     92 inline uint64_t EncodeZigZag(int64_t val, size_t block_exponent) {
     93   assert(block_exponent < 64);
     94   const uint64_t uval = static_cast<uint64_t>(val >= 0 ? val : -val - 1);
     95   const uint64_t block_num = ((uval >> block_exponent) << 1) + (val >= 0 ? 0 : 1);
     96   const uint64_t pos = GetLowerBits(uval, block_exponent);
     97   return (block_num << block_exponent) + pos;
     98 }
     99 
    100 // Decodes signed integer encoded with EncodeZigZag. |block_exponent| must be
    101 // the same.
    102 inline int64_t DecodeZigZag(uint64_t val, size_t block_exponent) {
    103   assert(block_exponent < 64);
    104   const uint64_t block_num = val >> block_exponent;
    105   const uint64_t pos = GetLowerBits(val, block_exponent);
    106   if (block_num & 1) {
    107     // Negative.
    108     return -1LL - ((block_num >> 1) << block_exponent) - pos;
    109   } else {
    110     // Positive.
    111     return ((block_num >> 1) << block_exponent) + pos;
    112   }
    113 }
    114 
    115 // Converts |buffer| to a stream of '0' and '1'.
    116 template <typename T>
    117 std::string BufferToStream(const std::vector<T>& buffer) {
    118   std::stringstream ss;
    119   for (auto it = buffer.begin(); it != buffer.end(); ++it) {
    120     std::string str = std::bitset<sizeof(T) * 8>(*it).to_string();
    121     // Strings generated by std::bitset::to_string are read right to left.
    122     // Reversing to left to right.
    123     std::reverse(str.begin(), str.end());
    124     ss << str;
    125   }
    126   return ss.str();
    127 }
    128 
    129 // Converts a left-to-right input string of '0' and '1' to a buffer of |T|
    130 // words.
    131 template <typename T>
    132 std::vector<T> StreamToBuffer(std::string str) {
    133   // The input string is left-to-right, the input argument of std::bitset needs
    134   // to right-to-left. Instead of reversing tokens, reverse the entire string
    135   // and iterate tokens from end to begin.
    136   std::reverse(str.begin(), str.end());
    137   const int word_size = static_cast<int>(sizeof(T) * 8);
    138   const int str_length = static_cast<int>(str.length());
    139   std::vector<T> buffer;
    140   buffer.reserve(NumBitsToNumWords<sizeof(T)>(str.length()));
    141   for (int index = str_length - word_size; index >= 0; index -= word_size) {
    142     buffer.push_back(static_cast<T>(std::bitset<sizeof(T) * 8>(
    143         str, index, word_size).to_ullong()));
    144   }
    145   const size_t suffix_length = str.length() % word_size;
    146   if (suffix_length != 0) {
    147     buffer.push_back(static_cast<T>(std::bitset<sizeof(T) * 8>(
    148         str, 0, suffix_length).to_ullong()));
    149   }
    150   return buffer;
    151 }
    152 
    153 // Adds '0' chars at the end of the string until the size is a multiple of N.
    154 template <size_t N>
    155 inline std::string PadToWord(std::string&& str) {
    156   const size_t tail_length = str.size() % N;
    157   if (tail_length != 0)
    158     str += std::string(N - tail_length, '0');
    159   return str;
    160 }
    161 
    162 // Adds '0' chars at the end of the string until the size is a multiple of N.
    163 template <size_t N>
    164 inline std::string PadToWord(const std::string& str) {
    165   return PadToWord<N>(std::string(str));
    166 }
    167 
    168 // Converts a left-to-right stream of bits to std::bitset.
    169 template <size_t N>
    170 inline std::bitset<N> StreamToBitset(std::string str) {
    171   std::reverse(str.begin(), str.end());
    172   return std::bitset<N>(str);
    173 }
    174 
    175 // Converts first |num_bits| of std::bitset to a left-to-right stream of bits.
    176 template <size_t N>
    177 inline std::string BitsetToStream(const std::bitset<N>& bits, size_t num_bits = N) {
    178   std::string str = bits.to_string().substr(N - num_bits);
    179   std::reverse(str.begin(), str.end());
    180   return str;
    181 }
    182 
    183 // Converts a left-to-right stream of bits to uint64.
    184 inline uint64_t StreamToBits(std::string str) {
    185   std::reverse(str.begin(), str.end());
    186   return std::bitset<64>(str).to_ullong();
    187 }
    188 
    189 // Converts first |num_bits| stored in uint64 to a left-to-right stream of bits.
    190 inline std::string BitsToStream(uint64_t bits, size_t num_bits = 64) {
    191   std::bitset<64> bitset(bits);
    192   return BitsetToStream(bitset, num_bits);
    193 }
    194 
    195 // Base class for writing sequences of bits.
    196 class BitWriterInterface {
    197  public:
    198   BitWriterInterface() {}
    199   virtual ~BitWriterInterface() {}
    200 
    201   // Writes lower |num_bits| in |bits| to the stream.
    202   // |num_bits| must be no greater than 64.
    203   virtual void WriteBits(uint64_t bits, size_t num_bits) = 0;
    204 
    205   // Writes left-to-right string of '0' and '1' to stream.
    206   // String length must be no greater than 64.
    207   // Note: "01" will be writen as 0x2, not 0x1. The string doesn't represent
    208   // numbers but a stream of bits in the order they come from encoder.
    209   virtual void WriteStream(const std::string& bits) {
    210     WriteBits(StreamToBits(bits), bits.length());
    211   }
    212 
    213   // Writes lower |num_bits| in |bits| to the stream.
    214   // |num_bits| must be no greater than 64.
    215   template <size_t N>
    216   void WriteBitset(const std::bitset<N>& bits, size_t num_bits = N) {
    217     WriteBits(bits.to_ullong(), num_bits);
    218   }
    219 
    220   // Writes bits from value of type |T| to the stream. No encoding is done.
    221   // Always writes 8 * sizeof(T) bits.
    222   template <typename T>
    223   void WriteUnencoded(T val) {
    224     static_assert(sizeof(T) <= 64, "Type size too large");
    225     uint64_t bits = 0;
    226     memcpy(&bits, &val, sizeof(T));
    227     WriteBits(bits, sizeof(T) * 8);
    228   }
    229 
    230   // Writes |val| in chunks of size |chunk_length| followed by a signal bit:
    231   // 0 - no more chunks to follow
    232   // 1 - more chunks to follow
    233   // for example 255 is encoded into 1111 1 1111 0 for chunk length 4.
    234   // The last chunk can be truncated and signal bit omitted, if the entire
    235   // payload (for example 16 bit for uint16_t has already been written).
    236   void WriteVariableWidthU64(uint64_t val, size_t chunk_length);
    237   void WriteVariableWidthU32(uint32_t val, size_t chunk_length);
    238   void WriteVariableWidthU16(uint16_t val, size_t chunk_length);
    239   void WriteVariableWidthU8(uint8_t val, size_t chunk_length);
    240   void WriteVariableWidthS64(
    241       int64_t val, size_t chunk_length, size_t zigzag_exponent);
    242   void WriteVariableWidthS32(
    243       int32_t val, size_t chunk_length, size_t zigzag_exponent);
    244   void WriteVariableWidthS16(
    245       int16_t val, size_t chunk_length, size_t zigzag_exponent);
    246   void WriteVariableWidthS8(
    247       int8_t val, size_t chunk_length, size_t zigzag_exponent);
    248 
    249   // Writes |val| using fixed bit width. Bit width is determined by |max_val|:
    250   // max_val 0 -> bit width 1
    251   // max_val 1 -> bit width 1
    252   // max_val 2 -> bit width 2
    253   // max_val 3 -> bit width 2
    254   // max_val 4 -> bit width 3
    255   // max_val 5 -> bit width 3
    256   // max_val 8 -> bit width 4
    257   // max_val n -> bit width 1 + floor(log2(n))
    258   // |val| needs to be <= |max_val|.
    259   void WriteFixedWidth(uint64_t val, uint64_t max_val);
    260 
    261   // Returns number of bits written.
    262   virtual size_t GetNumBits() const = 0;
    263 
    264   // Provides direct access to the buffer data if implemented.
    265   virtual const uint8_t* GetData() const {
    266     return nullptr;
    267   }
    268 
    269   // Returns buffer size in bytes.
    270   size_t GetDataSizeBytes() const {
    271     return NumBitsToNumWords<8>(GetNumBits());
    272   }
    273 
    274   // Generates and returns byte array containing written bits.
    275   virtual std::vector<uint8_t> GetDataCopy() const = 0;
    276 
    277   BitWriterInterface(const BitWriterInterface&) = delete;
    278   BitWriterInterface& operator=(const BitWriterInterface&) = delete;
    279 };
    280 
    281 // This class is an implementation of BitWriterInterface, using
    282 // std::vector<uint64_t> to store written bits.
    283 class BitWriterWord64 : public BitWriterInterface {
    284  public:
    285   explicit BitWriterWord64(size_t reserve_bits = 64);
    286 
    287   void WriteBits(uint64_t bits, size_t num_bits) override;
    288 
    289   size_t GetNumBits() const override {
    290     return end_;
    291   }
    292 
    293   const uint8_t* GetData() const override {
    294     return reinterpret_cast<const uint8_t*>(buffer_.data());
    295   }
    296 
    297   std::vector<uint8_t> GetDataCopy() const override {
    298     return std::vector<uint8_t>(GetData(), GetData() + GetDataSizeBytes());
    299   }
    300 
    301   // Returns written stream as std::string, padded with zeroes so that the
    302   // length is a multiple of 64.
    303   std::string GetStreamPadded64() const {
    304     return BufferToStream(buffer_);
    305   }
    306 
    307   // Sets callback to emit bit sequences after every write.
    308   void SetCallback(std::function<void(const std::string&)> callback) {
    309     callback_ = callback;
    310   }
    311 
    312  protected:
    313   // Sends string generated from arguments to callback_ if defined.
    314   void EmitSequence(uint64_t bits, size_t num_bits) const {
    315     if (callback_)
    316       callback_(BitsToStream(bits, num_bits));
    317   }
    318 
    319  private:
    320   std::vector<uint64_t> buffer_;
    321   // Total number of bits written so far. Named 'end' as analogy to std::end().
    322   size_t end_;
    323 
    324   // If not null, the writer will use the callback to emit the written bit
    325   // sequence as a string of '0' and '1'.
    326   std::function<void(const std::string&)> callback_;
    327 };
    328 
    329 // Base class for reading sequences of bits.
    330 class BitReaderInterface {
    331  public:
    332   BitReaderInterface() {}
    333   virtual ~BitReaderInterface() {}
    334 
    335   // Reads |num_bits| from the stream, stores them in |bits|.
    336   // Returns number of read bits. |num_bits| must be no greater than 64.
    337   virtual size_t ReadBits(uint64_t* bits, size_t num_bits) = 0;
    338 
    339   // Reads |num_bits| from the stream, stores them in |bits|.
    340   // Returns number of read bits. |num_bits| must be no greater than 64.
    341   template <size_t N>
    342   size_t ReadBitset(std::bitset<N>* bits, size_t num_bits = N) {
    343     uint64_t val = 0;
    344     size_t num_read = ReadBits(&val, num_bits);
    345     if (num_read) {
    346       *bits = std::bitset<N>(val);
    347     }
    348     return num_read;
    349   }
    350 
    351   // Reads |num_bits| from the stream, returns string in left-to-right order.
    352   // The length of the returned string may be less than |num_bits| if end was
    353   // reached.
    354   std::string ReadStream(size_t num_bits) {
    355     uint64_t bits = 0;
    356     size_t num_read = ReadBits(&bits, num_bits);
    357     return BitsToStream(bits, num_read);
    358   }
    359 
    360   // Reads 8 * sizeof(T) bits and stores them in |val|.
    361   template <typename T>
    362   bool ReadUnencoded(T* val) {
    363     static_assert(sizeof(T) <= 64, "Type size too large");
    364     uint64_t bits = 0;
    365     const size_t num_read = ReadBits(&bits, sizeof(T) * 8);
    366     if (num_read != sizeof(T) * 8)
    367       return false;
    368     memcpy(val, &bits, sizeof(T));
    369     return true;
    370   }
    371 
    372   // Returns number of bits already read.
    373   virtual size_t GetNumReadBits() const = 0;
    374 
    375   // These two functions define 'hard' and 'soft' EOF.
    376   //
    377   // Returns true if the end of the buffer was reached.
    378   virtual bool ReachedEnd() const = 0;
    379   // Returns true if we reached the end of the buffer or are nearing it and only
    380   // zero bits are left to read. Implementations of this function are allowed to
    381   // commit a "false negative" error if the end of the buffer was not reached,
    382   // i.e. it can return false even if indeed only zeroes are left.
    383   // It is assumed that the consumer expects that
    384   // the buffer stream ends with padding zeroes, and would accept this as a
    385   // 'soft' EOF. Implementations of this class do not necessarily need to
    386   // implement this, default behavior can simply delegate to ReachedEnd().
    387   virtual bool OnlyZeroesLeft() const {
    388     return ReachedEnd();
    389   }
    390 
    391   // Reads value encoded with WriteVariableWidthXXX (see BitWriterInterface).
    392   // Reader and writer must use the same |chunk_length| and variable type.
    393   // Returns true on success, false if the bit stream ends prematurely.
    394   bool ReadVariableWidthU64(uint64_t* val, size_t chunk_length);
    395   bool ReadVariableWidthU32(uint32_t* val, size_t chunk_length);
    396   bool ReadVariableWidthU16(uint16_t* val, size_t chunk_length);
    397   bool ReadVariableWidthU8(uint8_t* val, size_t chunk_length);
    398   bool ReadVariableWidthS64(
    399       int64_t* val, size_t chunk_length, size_t zigzag_exponent);
    400   bool ReadVariableWidthS32(
    401       int32_t* val, size_t chunk_length, size_t zigzag_exponent);
    402   bool ReadVariableWidthS16(
    403       int16_t* val, size_t chunk_length, size_t zigzag_exponent);
    404   bool ReadVariableWidthS8(
    405       int8_t* val, size_t chunk_length, size_t zigzag_exponent);
    406 
    407   // Reads value written by WriteFixedWidth (|max_val| needs to be the same).
    408   // Returns true on success, false if the bit stream ends prematurely.
    409   bool ReadFixedWidth(uint64_t* val, uint64_t max_val);
    410 
    411   BitReaderInterface(const BitReaderInterface&) = delete;
    412   BitReaderInterface& operator=(const BitReaderInterface&) = delete;
    413 };
    414 
    415 // This class is an implementation of BitReaderInterface which accepts both
    416 // uint8_t and uint64_t buffers as input. uint64_t buffers are consumed and
    417 // owned. uint8_t buffers are copied.
    418 class BitReaderWord64 : public BitReaderInterface {
    419  public:
    420   // Consumes and owns the buffer.
    421   explicit BitReaderWord64(std::vector<uint64_t>&& buffer);
    422 
    423   // Copies the buffer and casts it to uint64.
    424   // Consuming the original buffer and casting it to uint64 is difficult,
    425   // as it would potentially cause data misalignment and poor performance.
    426   explicit BitReaderWord64(const std::vector<uint8_t>& buffer);
    427   BitReaderWord64(const void* buffer, size_t num_bytes);
    428 
    429   size_t ReadBits(uint64_t* bits, size_t num_bits) override;
    430 
    431   size_t GetNumReadBits() const override {
    432     return pos_;
    433   }
    434 
    435   bool ReachedEnd() const override;
    436   bool OnlyZeroesLeft() const override;
    437 
    438   BitReaderWord64() = delete;
    439  private:
    440   const std::vector<uint64_t> buffer_;
    441   size_t pos_;
    442 };
    443 
    444 }  // namespace spvutils
    445 
    446 #endif  // LIBSPIRV_UTIL_BIT_STREAM_H_
    447