1 // Copyright (c) 2017 Google Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <algorithm> 16 #include <cassert> 17 #include <cstring> 18 #include <sstream> 19 #include <type_traits> 20 21 #include "util/bit_stream.h" 22 23 namespace spvutils { 24 25 namespace { 26 27 // Returns if the system is little-endian. Unfortunately only works during 28 // runtime. 29 bool IsLittleEndian() { 30 // This constant value allows the detection of the host machine's endianness. 31 // Accessing it as an array of bytes is valid due to C++11 section 3.10 32 // paragraph 10. 33 static const uint16_t kFF00 = 0xff00; 34 return reinterpret_cast<const unsigned char*>(&kFF00)[0] == 0; 35 } 36 37 // Copies bytes from the given buffer to a uint64_t buffer. 38 // Motivation: casting uint64_t* to uint8_t* is ok. Casting in the other 39 // direction is only advisable if uint8_t* is aligned to 64-bit word boundary. 40 std::vector<uint64_t> ToBuffer64(const void* buffer, size_t num_bytes) { 41 std::vector<uint64_t> out; 42 out.resize((num_bytes + 7) / 8, 0); 43 memcpy(out.data(), buffer, num_bytes); 44 return out; 45 } 46 47 // Copies uint8_t buffer to a uint64_t buffer. 48 std::vector<uint64_t> ToBuffer64(const std::vector<uint8_t>& in) { 49 return ToBuffer64(in.data(), in.size()); 50 } 51 52 // Returns uint64_t containing the same bits as |val|. 53 // Type size must be less than 8 bytes. 54 template <typename T> 55 uint64_t ToU64(T val) { 56 static_assert(sizeof(T) <= 8, "Type size too big"); 57 uint64_t val64 = 0; 58 std::memcpy(&val64, &val, sizeof(T)); 59 return val64; 60 } 61 62 // Returns value of type T containing the same bits as |val64|. 63 // Type size must be less than 8 bytes. Upper (unused) bits of |val64| must be 64 // zero (irrelevant, but is checked with assertion). 65 template <typename T> 66 T FromU64(uint64_t val64) { 67 assert(sizeof(T) == 8 || (val64 >> (sizeof(T) * 8)) == 0); 68 static_assert(sizeof(T) <= 8, "Type size too big"); 69 T val = 0; 70 std::memcpy(&val, &val64, sizeof(T)); 71 return val; 72 } 73 74 // Writes bits from |val| to |writer| in chunks of size |chunk_length|. 75 // Signal bit is used to signal if the reader should expect another chunk: 76 // 0 - no more chunks to follow 77 // 1 - more chunks to follow 78 // If number of written bits reaches |max_payload| last chunk is truncated. 79 void WriteVariableWidthInternal(BitWriterInterface* writer, uint64_t val, 80 size_t chunk_length, size_t max_payload) { 81 assert(chunk_length > 0); 82 assert(chunk_length < max_payload); 83 assert(max_payload == 64 || (val >> max_payload) == 0); 84 85 if (val == 0) { 86 // Split in two writes for more readable logging. 87 writer->WriteBits(0, chunk_length); 88 writer->WriteBits(0, 1); 89 return; 90 } 91 92 size_t payload_written = 0; 93 94 while (val) { 95 if (payload_written + chunk_length >= max_payload) { 96 // This has to be the last chunk. 97 // There is no need for the signal bit and the chunk can be truncated. 98 const size_t left_to_write = max_payload - payload_written; 99 assert((val >> left_to_write) == 0); 100 writer->WriteBits(val, left_to_write); 101 break; 102 } 103 104 writer->WriteBits(val, chunk_length); 105 payload_written += chunk_length; 106 val = val >> chunk_length; 107 108 // Write a single bit to signal if there is more to come. 109 writer->WriteBits(val ? 1 : 0, 1); 110 } 111 } 112 113 // Reads data written with WriteVariableWidthInternal. |chunk_length| and 114 // |max_payload| should be identical to those used to write the data. 115 // Returns false if the stream ends prematurely. 116 bool ReadVariableWidthInternal(BitReaderInterface* reader, uint64_t* val, 117 size_t chunk_length, size_t max_payload) { 118 assert(chunk_length > 0); 119 assert(chunk_length <= max_payload); 120 size_t payload_read = 0; 121 122 while (payload_read + chunk_length < max_payload) { 123 uint64_t bits = 0; 124 if (reader->ReadBits(&bits, chunk_length) != chunk_length) 125 return false; 126 127 *val |= bits << payload_read; 128 payload_read += chunk_length; 129 130 uint64_t more_to_come = 0; 131 if (reader->ReadBits(&more_to_come, 1) != 1) 132 return false; 133 134 if (!more_to_come) { 135 return true; 136 } 137 } 138 139 // Need to read the last chunk which may be truncated. No signal bit follows. 140 uint64_t bits = 0; 141 const size_t left_to_read = max_payload - payload_read; 142 if (reader->ReadBits(&bits, left_to_read) != left_to_read) 143 return false; 144 145 *val |= bits << payload_read; 146 return true; 147 } 148 149 // Calls WriteVariableWidthInternal with the right max_payload argument. 150 template <typename T> 151 void WriteVariableWidthUnsigned(BitWriterInterface* writer, T val, 152 size_t chunk_length) { 153 static_assert(std::is_unsigned<T>::value, "Type must be unsigned"); 154 static_assert(std::is_integral<T>::value, "Type must be integral"); 155 WriteVariableWidthInternal(writer, val, chunk_length, sizeof(T) * 8); 156 } 157 158 // Calls ReadVariableWidthInternal with the right max_payload argument. 159 template <typename T> 160 bool ReadVariableWidthUnsigned(BitReaderInterface* reader, T* val, 161 size_t chunk_length) { 162 static_assert(std::is_unsigned<T>::value, "Type must be unsigned"); 163 static_assert(std::is_integral<T>::value, "Type must be integral"); 164 uint64_t val64 = 0; 165 if (!ReadVariableWidthInternal(reader, &val64, chunk_length, sizeof(T) * 8)) 166 return false; 167 *val = static_cast<T>(val64); 168 assert(*val == val64); 169 return true; 170 } 171 172 // Encodes signed |val| to an unsigned value and calls 173 // WriteVariableWidthInternal with the right max_payload argument. 174 template <typename T> 175 void WriteVariableWidthSigned(BitWriterInterface* writer, T val, 176 size_t chunk_length, size_t zigzag_exponent) { 177 static_assert(std::is_signed<T>::value, "Type must be signed"); 178 static_assert(std::is_integral<T>::value, "Type must be integral"); 179 WriteVariableWidthInternal(writer, EncodeZigZag(val, zigzag_exponent), 180 chunk_length, sizeof(T) * 8); 181 } 182 183 // Calls ReadVariableWidthInternal with the right max_payload argument 184 // and decodes the value. 185 template <typename T> 186 bool ReadVariableWidthSigned(BitReaderInterface* reader, T* val, 187 size_t chunk_length, size_t zigzag_exponent) { 188 static_assert(std::is_signed<T>::value, "Type must be signed"); 189 static_assert(std::is_integral<T>::value, "Type must be integral"); 190 uint64_t encoded = 0; 191 if (!ReadVariableWidthInternal(reader, &encoded, chunk_length, sizeof(T) * 8)) 192 return false; 193 194 const int64_t decoded = DecodeZigZag(encoded, zigzag_exponent); 195 196 *val = static_cast<T>(decoded); 197 assert(*val == decoded); 198 return true; 199 } 200 201 } // namespace 202 203 size_t Log2U64(uint64_t val) { 204 size_t res = 0; 205 206 if (val & 0xFFFFFFFF00000000) { 207 val >>= 32; 208 res |= 32; 209 } 210 211 if (val & 0xFFFF0000) { 212 val >>= 16; 213 res |= 16; 214 } 215 216 if (val & 0xFF00) { 217 val >>= 8; 218 res |= 8; 219 } 220 221 if (val & 0xF0) { 222 val >>= 4; 223 res |= 4; 224 } 225 226 if (val & 0xC) { 227 val >>= 2; 228 res |= 2; 229 } 230 231 if (val & 0x2) { 232 res |= 1; 233 } 234 235 return res; 236 } 237 238 void BitWriterInterface::WriteVariableWidthU64(uint64_t val, 239 size_t chunk_length) { 240 WriteVariableWidthUnsigned(this, val, chunk_length); 241 } 242 243 void BitWriterInterface::WriteVariableWidthU32(uint32_t val, 244 size_t chunk_length) { 245 WriteVariableWidthUnsigned(this, val, chunk_length); 246 } 247 248 void BitWriterInterface::WriteVariableWidthU16(uint16_t val, 249 size_t chunk_length) { 250 WriteVariableWidthUnsigned(this, val, chunk_length); 251 } 252 253 void BitWriterInterface::WriteVariableWidthU8(uint8_t val, 254 size_t chunk_length) { 255 WriteVariableWidthUnsigned(this, val, chunk_length); 256 } 257 258 void BitWriterInterface::WriteVariableWidthS64(int64_t val, 259 size_t chunk_length, 260 size_t zigzag_exponent) { 261 WriteVariableWidthSigned(this, val, chunk_length, zigzag_exponent); 262 } 263 264 void BitWriterInterface::WriteVariableWidthS32(int32_t val, 265 size_t chunk_length, 266 size_t zigzag_exponent) { 267 WriteVariableWidthSigned(this, val, chunk_length, zigzag_exponent); 268 } 269 270 void BitWriterInterface::WriteVariableWidthS16(int16_t val, 271 size_t chunk_length, 272 size_t zigzag_exponent) { 273 WriteVariableWidthSigned(this, val, chunk_length, zigzag_exponent); 274 } 275 276 void BitWriterInterface::WriteVariableWidthS8(int8_t val, 277 size_t chunk_length, 278 size_t zigzag_exponent) { 279 WriteVariableWidthSigned(this, val, chunk_length, zigzag_exponent); 280 } 281 282 void BitWriterInterface::WriteFixedWidth(uint64_t val, uint64_t max_val) { 283 if (val > max_val) { 284 assert(0 && "WriteFixedWidth: value too wide"); 285 return; 286 } 287 288 const size_t num_bits = 1 + Log2U64(max_val); 289 WriteBits(val, num_bits); 290 } 291 292 BitWriterWord64::BitWriterWord64(size_t reserve_bits) : end_(0) { 293 buffer_.reserve(NumBitsToNumWords<64>(reserve_bits)); 294 } 295 296 void BitWriterWord64::WriteBits(uint64_t bits, size_t num_bits) { 297 // Check that |bits| and |num_bits| are valid and consistent. 298 assert(num_bits <= 64); 299 const bool is_little_endian = IsLittleEndian(); 300 assert(is_little_endian && "Big-endian architecture support not implemented"); 301 if (!is_little_endian) return; 302 303 if (num_bits == 0) return; 304 305 bits = GetLowerBits(bits, num_bits); 306 307 EmitSequence(bits, num_bits); 308 309 // Offset from the start of the current word. 310 const size_t offset = end_ % 64; 311 312 if (offset == 0) { 313 // If no offset, simply add |bits| as a new word to the buffer_. 314 buffer_.push_back(bits); 315 } else { 316 // Shift bits and add them to the current word after offset. 317 const uint64_t first_word = bits << offset; 318 buffer_.back() |= first_word; 319 320 // If we don't overflow to the next word, there is nothing more to do. 321 322 if (offset + num_bits > 64) { 323 // We overflow to the next word. 324 const uint64_t second_word = bits >> (64 - offset); 325 // Add remaining bits as a new word to buffer_. 326 buffer_.push_back(second_word); 327 } 328 } 329 330 // Move end_ into position for next write. 331 end_ += num_bits; 332 assert(buffer_.size() * 64 >= end_); 333 } 334 335 bool BitReaderInterface::ReadVariableWidthU64(uint64_t* val, 336 size_t chunk_length) { 337 return ReadVariableWidthUnsigned(this, val, chunk_length); 338 } 339 340 bool BitReaderInterface::ReadVariableWidthU32(uint32_t* val, 341 size_t chunk_length) { 342 return ReadVariableWidthUnsigned(this, val, chunk_length); 343 } 344 345 bool BitReaderInterface::ReadVariableWidthU16(uint16_t* val, 346 size_t chunk_length) { 347 return ReadVariableWidthUnsigned(this, val, chunk_length); 348 } 349 350 bool BitReaderInterface::ReadVariableWidthU8(uint8_t* val, 351 size_t chunk_length) { 352 return ReadVariableWidthUnsigned(this, val, chunk_length); 353 } 354 355 bool BitReaderInterface::ReadVariableWidthS64(int64_t* val, 356 size_t chunk_length, 357 size_t zigzag_exponent) { 358 return ReadVariableWidthSigned(this, val, chunk_length, zigzag_exponent); 359 } 360 361 bool BitReaderInterface::ReadVariableWidthS32(int32_t* val, 362 size_t chunk_length, 363 size_t zigzag_exponent) { 364 return ReadVariableWidthSigned(this, val, chunk_length, zigzag_exponent); 365 } 366 367 bool BitReaderInterface::ReadVariableWidthS16(int16_t* val, 368 size_t chunk_length, 369 size_t zigzag_exponent) { 370 return ReadVariableWidthSigned(this, val, chunk_length, zigzag_exponent); 371 } 372 373 bool BitReaderInterface::ReadVariableWidthS8(int8_t* val, 374 size_t chunk_length, 375 size_t zigzag_exponent) { 376 return ReadVariableWidthSigned(this, val, chunk_length, zigzag_exponent); 377 } 378 379 bool BitReaderInterface::ReadFixedWidth(uint64_t* val, uint64_t max_val) { 380 const size_t num_bits = 1 + Log2U64(max_val); 381 return ReadBits(val, num_bits) == num_bits; 382 } 383 384 BitReaderWord64::BitReaderWord64(std::vector<uint64_t>&& buffer) 385 : buffer_(std::move(buffer)), pos_(0) {} 386 387 BitReaderWord64::BitReaderWord64(const std::vector<uint8_t>& buffer) 388 : buffer_(ToBuffer64(buffer)), pos_(0) {} 389 390 BitReaderWord64::BitReaderWord64(const void* buffer, size_t num_bytes) 391 : buffer_(ToBuffer64(buffer, num_bytes)), pos_(0) {} 392 393 size_t BitReaderWord64::ReadBits(uint64_t* bits, size_t num_bits) { 394 assert(num_bits <= 64); 395 const bool is_little_endian = IsLittleEndian(); 396 assert(is_little_endian && "Big-endian architecture support not implemented"); 397 if (!is_little_endian) return 0; 398 399 if (ReachedEnd()) 400 return 0; 401 402 // Index of the current word. 403 const size_t index = pos_ / 64; 404 405 // Bit position in the current word where we start reading. 406 const size_t offset = pos_ % 64; 407 408 // Read all bits from the current word (it might be too much, but 409 // excessive bits will be removed later). 410 *bits = buffer_[index] >> offset; 411 412 const size_t num_read_from_first_word = std::min(64 - offset, num_bits); 413 pos_ += num_read_from_first_word; 414 415 if (pos_ >= buffer_.size() * 64) { 416 // Reached end of buffer_. 417 return num_read_from_first_word; 418 } 419 420 if (offset + num_bits > 64) { 421 // Requested |num_bits| overflows to next word. 422 // Write all bits from the beginning of next word to *bits after offset. 423 *bits |= buffer_[index + 1] << (64 - offset); 424 pos_ += offset + num_bits - 64; 425 } 426 427 // We likely have written more bits than requested. Clear excessive bits. 428 *bits = GetLowerBits(*bits, num_bits); 429 return num_bits; 430 } 431 432 bool BitReaderWord64::ReachedEnd() const { 433 return pos_ >= buffer_.size() * 64; 434 } 435 436 bool BitReaderWord64::OnlyZeroesLeft() const { 437 if (ReachedEnd()) 438 return true; 439 440 const size_t index = pos_ / 64; 441 if (index < buffer_.size() - 1) 442 return false; 443 444 assert(index == buffer_.size() - 1); 445 446 const size_t offset = pos_ % 64; 447 const uint64_t remaining_bits = buffer_[index] >> offset; 448 return !remaining_bits; 449 } 450 451 } // namespace spvutils 452