Home | History | Annotate | Download | only in util
      1 // Copyright (c) 2017 Google Inc.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include <algorithm>
     16 #include <cassert>
     17 #include <cstring>
     18 #include <sstream>
     19 #include <type_traits>
     20 
     21 #include "util/bit_stream.h"
     22 
     23 namespace spvutils {
     24 
     25 namespace {
     26 
     27 // Returns if the system is little-endian. Unfortunately only works during
     28 // runtime.
     29 bool IsLittleEndian() {
     30   // This constant value allows the detection of the host machine's endianness.
     31   // Accessing it as an array of bytes is valid due to C++11 section 3.10
     32   // paragraph 10.
     33   static const uint16_t kFF00 = 0xff00;
     34   return reinterpret_cast<const unsigned char*>(&kFF00)[0] == 0;
     35 }
     36 
     37 // Copies bytes from the given buffer to a uint64_t buffer.
     38 // Motivation: casting uint64_t* to uint8_t* is ok. Casting in the other
     39 // direction is only advisable if uint8_t* is aligned to 64-bit word boundary.
     40 std::vector<uint64_t> ToBuffer64(const void* buffer, size_t num_bytes) {
     41   std::vector<uint64_t> out;
     42   out.resize((num_bytes + 7) / 8, 0);
     43   memcpy(out.data(), buffer, num_bytes);
     44   return out;
     45 }
     46 
     47 // Copies uint8_t buffer to a uint64_t buffer.
     48 std::vector<uint64_t> ToBuffer64(const std::vector<uint8_t>& in) {
     49   return ToBuffer64(in.data(), in.size());
     50 }
     51 
     52 // Returns uint64_t containing the same bits as |val|.
     53 // Type size must be less than 8 bytes.
     54 template <typename T>
     55 uint64_t ToU64(T val) {
     56   static_assert(sizeof(T) <= 8, "Type size too big");
     57   uint64_t val64 = 0;
     58   std::memcpy(&val64, &val, sizeof(T));
     59   return val64;
     60 }
     61 
     62 // Returns value of type T containing the same bits as |val64|.
     63 // Type size must be less than 8 bytes. Upper (unused) bits of |val64| must be
     64 // zero (irrelevant, but is checked with assertion).
     65 template <typename T>
     66 T FromU64(uint64_t val64) {
     67   assert(sizeof(T) == 8 || (val64 >> (sizeof(T) * 8)) == 0);
     68   static_assert(sizeof(T) <= 8, "Type size too big");
     69   T val = 0;
     70   std::memcpy(&val, &val64, sizeof(T));
     71   return val;
     72 }
     73 
     74 // Writes bits from |val| to |writer| in chunks of size |chunk_length|.
     75 // Signal bit is used to signal if the reader should expect another chunk:
     76 // 0 - no more chunks to follow
     77 // 1 - more chunks to follow
     78 // If number of written bits reaches |max_payload| last chunk is truncated.
     79 void WriteVariableWidthInternal(BitWriterInterface* writer, uint64_t val,
     80                                 size_t chunk_length, size_t max_payload) {
     81   assert(chunk_length > 0);
     82   assert(chunk_length < max_payload);
     83   assert(max_payload == 64 || (val >> max_payload) == 0);
     84 
     85   if (val == 0) {
     86     // Split in two writes for more readable logging.
     87     writer->WriteBits(0, chunk_length);
     88     writer->WriteBits(0, 1);
     89     return;
     90   }
     91 
     92   size_t payload_written = 0;
     93 
     94   while (val) {
     95     if (payload_written + chunk_length >= max_payload) {
     96       // This has to be the last chunk.
     97       // There is no need for the signal bit and the chunk can be truncated.
     98       const size_t left_to_write = max_payload - payload_written;
     99       assert((val >> left_to_write) == 0);
    100       writer->WriteBits(val, left_to_write);
    101       break;
    102     }
    103 
    104     writer->WriteBits(val, chunk_length);
    105     payload_written += chunk_length;
    106     val = val >> chunk_length;
    107 
    108     // Write a single bit to signal if there is more to come.
    109     writer->WriteBits(val ? 1 : 0, 1);
    110   }
    111 }
    112 
    113 // Reads data written with WriteVariableWidthInternal. |chunk_length| and
    114 // |max_payload| should be identical to those used to write the data.
    115 // Returns false if the stream ends prematurely.
    116 bool ReadVariableWidthInternal(BitReaderInterface* reader, uint64_t* val,
    117                                size_t chunk_length, size_t max_payload) {
    118   assert(chunk_length > 0);
    119   assert(chunk_length <= max_payload);
    120   size_t payload_read = 0;
    121 
    122   while (payload_read + chunk_length < max_payload) {
    123     uint64_t bits = 0;
    124     if (reader->ReadBits(&bits, chunk_length) != chunk_length)
    125       return false;
    126 
    127     *val |= bits << payload_read;
    128     payload_read += chunk_length;
    129 
    130     uint64_t more_to_come = 0;
    131     if (reader->ReadBits(&more_to_come, 1) != 1)
    132       return false;
    133 
    134     if (!more_to_come) {
    135       return true;
    136     }
    137   }
    138 
    139   // Need to read the last chunk which may be truncated. No signal bit follows.
    140   uint64_t bits = 0;
    141   const size_t left_to_read = max_payload - payload_read;
    142   if (reader->ReadBits(&bits, left_to_read) != left_to_read)
    143     return false;
    144 
    145   *val |= bits << payload_read;
    146   return true;
    147 }
    148 
    149 // Calls WriteVariableWidthInternal with the right max_payload argument.
    150 template <typename T>
    151 void WriteVariableWidthUnsigned(BitWriterInterface* writer, T val,
    152                                 size_t chunk_length) {
    153   static_assert(std::is_unsigned<T>::value, "Type must be unsigned");
    154   static_assert(std::is_integral<T>::value, "Type must be integral");
    155   WriteVariableWidthInternal(writer, val, chunk_length, sizeof(T) * 8);
    156 }
    157 
    158 // Calls ReadVariableWidthInternal with the right max_payload argument.
    159 template <typename T>
    160 bool ReadVariableWidthUnsigned(BitReaderInterface* reader, T* val,
    161                                size_t chunk_length) {
    162   static_assert(std::is_unsigned<T>::value, "Type must be unsigned");
    163   static_assert(std::is_integral<T>::value, "Type must be integral");
    164   uint64_t val64 = 0;
    165   if (!ReadVariableWidthInternal(reader, &val64, chunk_length, sizeof(T) * 8))
    166     return false;
    167   *val = static_cast<T>(val64);
    168   assert(*val == val64);
    169   return true;
    170 }
    171 
    172 // Encodes signed |val| to an unsigned value and calls
    173 // WriteVariableWidthInternal with the right max_payload argument.
    174 template <typename T>
    175 void WriteVariableWidthSigned(BitWriterInterface* writer, T val,
    176                               size_t chunk_length, size_t zigzag_exponent) {
    177   static_assert(std::is_signed<T>::value, "Type must be signed");
    178   static_assert(std::is_integral<T>::value, "Type must be integral");
    179   WriteVariableWidthInternal(writer, EncodeZigZag(val, zigzag_exponent),
    180                              chunk_length, sizeof(T) * 8);
    181 }
    182 
    183 // Calls ReadVariableWidthInternal with the right max_payload argument
    184 // and decodes the value.
    185 template <typename T>
    186 bool ReadVariableWidthSigned(BitReaderInterface* reader, T* val,
    187                              size_t chunk_length, size_t zigzag_exponent) {
    188   static_assert(std::is_signed<T>::value, "Type must be signed");
    189   static_assert(std::is_integral<T>::value, "Type must be integral");
    190   uint64_t encoded = 0;
    191   if (!ReadVariableWidthInternal(reader, &encoded, chunk_length, sizeof(T) * 8))
    192     return false;
    193 
    194   const int64_t decoded = DecodeZigZag(encoded, zigzag_exponent);
    195 
    196   *val = static_cast<T>(decoded);
    197   assert(*val == decoded);
    198   return true;
    199 }
    200 
    201 }  // namespace
    202 
    203 size_t Log2U64(uint64_t val) {
    204   size_t res = 0;
    205 
    206   if (val & 0xFFFFFFFF00000000) {
    207     val >>= 32;
    208     res |= 32;
    209   }
    210 
    211   if (val & 0xFFFF0000) {
    212     val >>= 16;
    213     res |= 16;
    214   }
    215 
    216   if (val & 0xFF00) {
    217     val >>= 8;
    218     res |= 8;
    219   }
    220 
    221   if (val & 0xF0) {
    222     val >>= 4;
    223     res |= 4;
    224   }
    225 
    226   if (val & 0xC) {
    227     val >>= 2;
    228     res |= 2;
    229   }
    230 
    231   if (val & 0x2) {
    232     res |= 1;
    233   }
    234 
    235   return res;
    236 }
    237 
    238 void BitWriterInterface::WriteVariableWidthU64(uint64_t val,
    239                                                size_t chunk_length) {
    240   WriteVariableWidthUnsigned(this, val, chunk_length);
    241 }
    242 
    243 void BitWriterInterface::WriteVariableWidthU32(uint32_t val,
    244                                                size_t chunk_length) {
    245   WriteVariableWidthUnsigned(this, val, chunk_length);
    246 }
    247 
    248 void BitWriterInterface::WriteVariableWidthU16(uint16_t val,
    249                                                size_t chunk_length) {
    250   WriteVariableWidthUnsigned(this, val, chunk_length);
    251 }
    252 
    253 void BitWriterInterface::WriteVariableWidthU8(uint8_t val,
    254                                               size_t chunk_length) {
    255   WriteVariableWidthUnsigned(this, val, chunk_length);
    256 }
    257 
    258 void BitWriterInterface::WriteVariableWidthS64(int64_t val,
    259                                                size_t chunk_length,
    260                                                size_t zigzag_exponent) {
    261   WriteVariableWidthSigned(this, val, chunk_length, zigzag_exponent);
    262 }
    263 
    264 void BitWriterInterface::WriteVariableWidthS32(int32_t val,
    265                                                size_t chunk_length,
    266                                                size_t zigzag_exponent) {
    267   WriteVariableWidthSigned(this, val, chunk_length, zigzag_exponent);
    268 }
    269 
    270 void BitWriterInterface::WriteVariableWidthS16(int16_t val,
    271                                                size_t chunk_length,
    272                                                size_t zigzag_exponent) {
    273   WriteVariableWidthSigned(this, val, chunk_length, zigzag_exponent);
    274 }
    275 
    276 void BitWriterInterface::WriteVariableWidthS8(int8_t val,
    277                                               size_t chunk_length,
    278                                               size_t zigzag_exponent) {
    279   WriteVariableWidthSigned(this, val, chunk_length, zigzag_exponent);
    280 }
    281 
    282 void BitWriterInterface::WriteFixedWidth(uint64_t val, uint64_t max_val) {
    283   if (val > max_val) {
    284     assert(0 && "WriteFixedWidth: value too wide");
    285     return;
    286   }
    287 
    288   const size_t num_bits = 1 + Log2U64(max_val);
    289   WriteBits(val, num_bits);
    290 }
    291 
    292 BitWriterWord64::BitWriterWord64(size_t reserve_bits) : end_(0) {
    293   buffer_.reserve(NumBitsToNumWords<64>(reserve_bits));
    294 }
    295 
    296 void BitWriterWord64::WriteBits(uint64_t bits, size_t num_bits) {
    297   // Check that |bits| and |num_bits| are valid and consistent.
    298   assert(num_bits <= 64);
    299   const bool is_little_endian = IsLittleEndian();
    300   assert(is_little_endian && "Big-endian architecture support not implemented");
    301   if (!is_little_endian) return;
    302 
    303   if (num_bits == 0) return;
    304 
    305   bits = GetLowerBits(bits, num_bits);
    306 
    307   EmitSequence(bits, num_bits);
    308 
    309   // Offset from the start of the current word.
    310   const size_t offset = end_ % 64;
    311 
    312   if (offset == 0) {
    313     // If no offset, simply add |bits| as a new word to the buffer_.
    314     buffer_.push_back(bits);
    315   } else {
    316     // Shift bits and add them to the current word after offset.
    317     const uint64_t first_word = bits << offset;
    318     buffer_.back() |= first_word;
    319 
    320     // If we don't overflow to the next word, there is nothing more to do.
    321 
    322     if (offset + num_bits > 64) {
    323       // We overflow to the next word.
    324       const uint64_t second_word = bits >> (64 - offset);
    325       // Add remaining bits as a new word to buffer_.
    326       buffer_.push_back(second_word);
    327     }
    328   }
    329 
    330   // Move end_ into position for next write.
    331   end_ += num_bits;
    332   assert(buffer_.size() * 64 >= end_);
    333 }
    334 
    335 bool BitReaderInterface::ReadVariableWidthU64(uint64_t* val,
    336                                               size_t chunk_length) {
    337   return ReadVariableWidthUnsigned(this, val, chunk_length);
    338 }
    339 
    340 bool BitReaderInterface::ReadVariableWidthU32(uint32_t* val,
    341                                               size_t chunk_length) {
    342   return ReadVariableWidthUnsigned(this, val, chunk_length);
    343 }
    344 
    345 bool BitReaderInterface::ReadVariableWidthU16(uint16_t* val,
    346                                               size_t chunk_length) {
    347   return ReadVariableWidthUnsigned(this, val, chunk_length);
    348 }
    349 
    350 bool BitReaderInterface::ReadVariableWidthU8(uint8_t* val,
    351                                              size_t chunk_length) {
    352   return ReadVariableWidthUnsigned(this, val, chunk_length);
    353 }
    354 
    355 bool BitReaderInterface::ReadVariableWidthS64(int64_t* val,
    356                                               size_t chunk_length,
    357                                               size_t zigzag_exponent) {
    358   return ReadVariableWidthSigned(this, val, chunk_length, zigzag_exponent);
    359 }
    360 
    361 bool BitReaderInterface::ReadVariableWidthS32(int32_t* val,
    362                                               size_t chunk_length,
    363                                               size_t zigzag_exponent) {
    364   return ReadVariableWidthSigned(this, val, chunk_length, zigzag_exponent);
    365 }
    366 
    367 bool BitReaderInterface::ReadVariableWidthS16(int16_t* val,
    368                                               size_t chunk_length,
    369                                               size_t zigzag_exponent) {
    370   return ReadVariableWidthSigned(this, val, chunk_length, zigzag_exponent);
    371 }
    372 
    373 bool BitReaderInterface::ReadVariableWidthS8(int8_t* val,
    374                                              size_t chunk_length,
    375                                              size_t zigzag_exponent) {
    376   return ReadVariableWidthSigned(this, val, chunk_length, zigzag_exponent);
    377 }
    378 
    379 bool BitReaderInterface::ReadFixedWidth(uint64_t* val, uint64_t max_val) {
    380   const size_t num_bits = 1 + Log2U64(max_val);
    381   return ReadBits(val, num_bits) == num_bits;
    382 }
    383 
    384 BitReaderWord64::BitReaderWord64(std::vector<uint64_t>&& buffer)
    385     : buffer_(std::move(buffer)), pos_(0) {}
    386 
    387 BitReaderWord64::BitReaderWord64(const std::vector<uint8_t>& buffer)
    388     : buffer_(ToBuffer64(buffer)), pos_(0) {}
    389 
    390 BitReaderWord64::BitReaderWord64(const void* buffer, size_t num_bytes)
    391     : buffer_(ToBuffer64(buffer, num_bytes)), pos_(0) {}
    392 
    393 size_t BitReaderWord64::ReadBits(uint64_t* bits, size_t num_bits) {
    394   assert(num_bits <= 64);
    395   const bool is_little_endian = IsLittleEndian();
    396   assert(is_little_endian && "Big-endian architecture support not implemented");
    397   if (!is_little_endian) return 0;
    398 
    399   if (ReachedEnd())
    400     return 0;
    401 
    402   // Index of the current word.
    403   const size_t index = pos_ / 64;
    404 
    405   // Bit position in the current word where we start reading.
    406   const size_t offset = pos_ % 64;
    407 
    408   // Read all bits from the current word (it might be too much, but
    409   // excessive bits will be removed later).
    410   *bits = buffer_[index] >> offset;
    411 
    412   const size_t num_read_from_first_word = std::min(64 - offset, num_bits);
    413   pos_ += num_read_from_first_word;
    414 
    415   if (pos_ >= buffer_.size() * 64) {
    416     // Reached end of buffer_.
    417     return num_read_from_first_word;
    418   }
    419 
    420   if (offset + num_bits > 64) {
    421     // Requested |num_bits| overflows to next word.
    422     // Write all bits from the beginning of next word to *bits after offset.
    423     *bits |= buffer_[index + 1] << (64 - offset);
    424     pos_ += offset + num_bits - 64;
    425   }
    426 
    427   // We likely have written more bits than requested. Clear excessive bits.
    428   *bits = GetLowerBits(*bits, num_bits);
    429   return num_bits;
    430 }
    431 
    432 bool BitReaderWord64::ReachedEnd() const {
    433   return pos_ >= buffer_.size() * 64;
    434 }
    435 
    436 bool BitReaderWord64::OnlyZeroesLeft() const {
    437   if (ReachedEnd())
    438     return true;
    439 
    440   const size_t index = pos_ / 64;
    441   if (index < buffer_.size() - 1)
    442     return false;
    443 
    444   assert(index == buffer_.size() - 1);
    445 
    446   const size_t offset = pos_ % 64;
    447   const uint64_t remaining_bits = buffer_[index] >> offset;
    448   return !remaining_bits;
    449 }
    450 
    451 }  // namespace spvutils
    452